xref: /freebsd/sys/compat/linux/linux_misc.c (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1994-1995 Søren Schmidt
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/fcntl.h>
37 #include <sys/jail.h>
38 #include <sys/imgact.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/msgbuf.h>
42 #include <sys/mutex.h>
43 #include <sys/poll.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/procctl.h>
47 #include <sys/reboot.h>
48 #include <sys/random.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sched.h>
51 #include <sys/smp.h>
52 #include <sys/stat.h>
53 #include <sys/syscallsubr.h>
54 #include <sys/sysctl.h>
55 #include <sys/sysent.h>
56 #include <sys/sysproto.h>
57 #include <sys/time.h>
58 #include <sys/vmmeter.h>
59 #include <sys/vnode.h>
60 
61 #include <security/audit/audit.h>
62 #include <security/mac/mac_framework.h>
63 
64 #include <vm/pmap.h>
65 #include <vm/vm_map.h>
66 #include <vm/swap_pager.h>
67 
68 #ifdef COMPAT_LINUX32
69 #include <machine/../linux32/linux.h>
70 #include <machine/../linux32/linux32_proto.h>
71 #else
72 #include <machine/../linux/linux.h>
73 #include <machine/../linux/linux_proto.h>
74 #endif
75 
76 #include <compat/linux/linux_common.h>
77 #include <compat/linux/linux_dtrace.h>
78 #include <compat/linux/linux_file.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_signal.h>
81 #include <compat/linux/linux_time.h>
82 #include <compat/linux/linux_util.h>
83 #include <compat/linux/linux_sysproto.h>
84 #include <compat/linux/linux_emul.h>
85 #include <compat/linux/linux_misc.h>
86 
87 int stclohz;				/* Statistics clock frequency */
88 
89 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
90 	RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
91 	RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
92 	RLIMIT_MEMLOCK, RLIMIT_AS
93 };
94 
95 struct l_sysinfo {
96 	l_long		uptime;		/* Seconds since boot */
97 	l_ulong		loads[3];	/* 1, 5, and 15 minute load averages */
98 #define LINUX_SYSINFO_LOADS_SCALE 65536
99 	l_ulong		totalram;	/* Total usable main memory size */
100 	l_ulong		freeram;	/* Available memory size */
101 	l_ulong		sharedram;	/* Amount of shared memory */
102 	l_ulong		bufferram;	/* Memory used by buffers */
103 	l_ulong		totalswap;	/* Total swap space size */
104 	l_ulong		freeswap;	/* swap space still available */
105 	l_ushort	procs;		/* Number of current processes */
106 	l_ushort	pads;
107 	l_ulong		totalhigh;
108 	l_ulong		freehigh;
109 	l_uint		mem_unit;
110 	char		_f[20-2*sizeof(l_long)-sizeof(l_int)];	/* padding */
111 };
112 
113 struct l_pselect6arg {
114 	l_uintptr_t	ss;
115 	l_size_t	ss_len;
116 };
117 
118 static int	linux_utimensat_lts_to_ts(struct l_timespec *,
119 			struct timespec *);
120 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
121 static int	linux_utimensat_lts64_to_ts(struct l_timespec64 *,
122 			struct timespec *);
123 #endif
124 static int	linux_common_utimensat(struct thread *, int,
125 			const char *, struct timespec *, int);
126 static int	linux_common_pselect6(struct thread *, l_int,
127 			l_fd_set *, l_fd_set *, l_fd_set *,
128 			struct timespec *, l_uintptr_t *);
129 static int	linux_common_ppoll(struct thread *, struct pollfd *,
130 			uint32_t, struct timespec *, l_sigset_t *,
131 			l_size_t);
132 static int	linux_pollin(struct thread *, struct pollfd *,
133 			struct pollfd *, u_int);
134 static int	linux_pollout(struct thread *, struct pollfd *,
135 			struct pollfd *, u_int);
136 
137 int
138 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
139 {
140 	struct l_sysinfo sysinfo;
141 	int i, j;
142 	struct timespec ts;
143 
144 	bzero(&sysinfo, sizeof(sysinfo));
145 	getnanouptime(&ts);
146 	if (ts.tv_nsec != 0)
147 		ts.tv_sec++;
148 	sysinfo.uptime = ts.tv_sec;
149 
150 	/* Use the information from the mib to get our load averages */
151 	for (i = 0; i < 3; i++)
152 		sysinfo.loads[i] = averunnable.ldavg[i] *
153 		    LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
154 
155 	sysinfo.totalram = physmem * PAGE_SIZE;
156 	sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
157 
158 	/*
159 	 * sharedram counts pages allocated to named, swap-backed objects such
160 	 * as shared memory segments and tmpfs files.  There is no cheap way to
161 	 * compute this, so just leave the field unpopulated.  Linux itself only
162 	 * started setting this field in the 3.x timeframe.
163 	 */
164 	sysinfo.sharedram = 0;
165 	sysinfo.bufferram = 0;
166 
167 	swap_pager_status(&i, &j);
168 	sysinfo.totalswap = i * PAGE_SIZE;
169 	sysinfo.freeswap = (i - j) * PAGE_SIZE;
170 
171 	sysinfo.procs = nprocs;
172 
173 	/*
174 	 * Platforms supported by the emulation layer do not have a notion of
175 	 * high memory.
176 	 */
177 	sysinfo.totalhigh = 0;
178 	sysinfo.freehigh = 0;
179 
180 	sysinfo.mem_unit = 1;
181 
182 	return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
183 }
184 
185 #ifdef LINUX_LEGACY_SYSCALLS
186 int
187 linux_alarm(struct thread *td, struct linux_alarm_args *args)
188 {
189 	struct itimerval it, old_it;
190 	u_int secs;
191 	int error __diagused;
192 
193 	secs = args->secs;
194 	/*
195 	 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
196 	 * to match kern_setitimer()'s limit to avoid error from it.
197 	 *
198 	 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
199 	 * platforms.
200 	 */
201 	if (secs > INT32_MAX / 2)
202 		secs = INT32_MAX / 2;
203 
204 	it.it_value.tv_sec = secs;
205 	it.it_value.tv_usec = 0;
206 	timevalclear(&it.it_interval);
207 	error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
208 	KASSERT(error == 0, ("kern_setitimer returns %d", error));
209 
210 	if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
211 	    old_it.it_value.tv_usec >= 500000)
212 		old_it.it_value.tv_sec++;
213 	td->td_retval[0] = old_it.it_value.tv_sec;
214 	return (0);
215 }
216 #endif
217 
218 int
219 linux_brk(struct thread *td, struct linux_brk_args *args)
220 {
221 	struct vmspace *vm = td->td_proc->p_vmspace;
222 	uintptr_t new, old;
223 
224 	old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
225 	new = (uintptr_t)args->dsend;
226 	if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
227 		td->td_retval[0] = (register_t)new;
228 	else
229 		td->td_retval[0] = (register_t)old;
230 
231 	return (0);
232 }
233 
234 #ifdef LINUX_LEGACY_SYSCALLS
235 int
236 linux_select(struct thread *td, struct linux_select_args *args)
237 {
238 	l_timeval ltv;
239 	struct timeval tv0, tv1, utv, *tvp;
240 	int error;
241 
242 	/*
243 	 * Store current time for computation of the amount of
244 	 * time left.
245 	 */
246 	if (args->timeout) {
247 		if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
248 			goto select_out;
249 		utv.tv_sec = ltv.tv_sec;
250 		utv.tv_usec = ltv.tv_usec;
251 
252 		if (itimerfix(&utv)) {
253 			/*
254 			 * The timeval was invalid.  Convert it to something
255 			 * valid that will act as it does under Linux.
256 			 */
257 			utv.tv_sec += utv.tv_usec / 1000000;
258 			utv.tv_usec %= 1000000;
259 			if (utv.tv_usec < 0) {
260 				utv.tv_sec -= 1;
261 				utv.tv_usec += 1000000;
262 			}
263 			if (utv.tv_sec < 0)
264 				timevalclear(&utv);
265 		}
266 		microtime(&tv0);
267 		tvp = &utv;
268 	} else
269 		tvp = NULL;
270 
271 	error = kern_select(td, args->nfds, args->readfds, args->writefds,
272 	    args->exceptfds, tvp, LINUX_NFDBITS);
273 	if (error)
274 		goto select_out;
275 
276 	if (args->timeout) {
277 		if (td->td_retval[0]) {
278 			/*
279 			 * Compute how much time was left of the timeout,
280 			 * by subtracting the current time and the time
281 			 * before we started the call, and subtracting
282 			 * that result from the user-supplied value.
283 			 */
284 			microtime(&tv1);
285 			timevalsub(&tv1, &tv0);
286 			timevalsub(&utv, &tv1);
287 			if (utv.tv_sec < 0)
288 				timevalclear(&utv);
289 		} else
290 			timevalclear(&utv);
291 		ltv.tv_sec = utv.tv_sec;
292 		ltv.tv_usec = utv.tv_usec;
293 		if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
294 			goto select_out;
295 	}
296 
297 select_out:
298 	return (error);
299 }
300 #endif
301 
302 int
303 linux_mremap(struct thread *td, struct linux_mremap_args *args)
304 {
305 	uintptr_t addr;
306 	size_t len;
307 	int error = 0;
308 
309 	if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
310 		td->td_retval[0] = 0;
311 		return (EINVAL);
312 	}
313 
314 	/*
315 	 * Check for the page alignment.
316 	 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
317 	 */
318 	if (args->addr & PAGE_MASK) {
319 		td->td_retval[0] = 0;
320 		return (EINVAL);
321 	}
322 
323 	args->new_len = round_page(args->new_len);
324 	args->old_len = round_page(args->old_len);
325 
326 	if (args->new_len > args->old_len) {
327 		td->td_retval[0] = 0;
328 		return (ENOMEM);
329 	}
330 
331 	if (args->new_len < args->old_len) {
332 		addr = args->addr + args->new_len;
333 		len = args->old_len - args->new_len;
334 		error = kern_munmap(td, addr, len);
335 	}
336 
337 	td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
338 	return (error);
339 }
340 
341 #define LINUX_MS_ASYNC       0x0001
342 #define LINUX_MS_INVALIDATE  0x0002
343 #define LINUX_MS_SYNC        0x0004
344 
345 int
346 linux_msync(struct thread *td, struct linux_msync_args *args)
347 {
348 
349 	return (kern_msync(td, args->addr, args->len,
350 	    args->fl & ~LINUX_MS_SYNC));
351 }
352 
353 #ifdef LINUX_LEGACY_SYSCALLS
354 int
355 linux_time(struct thread *td, struct linux_time_args *args)
356 {
357 	struct timeval tv;
358 	l_time_t tm;
359 	int error;
360 
361 	microtime(&tv);
362 	tm = tv.tv_sec;
363 	if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
364 		return (error);
365 	td->td_retval[0] = tm;
366 	return (0);
367 }
368 #endif
369 
370 struct l_times_argv {
371 	l_clock_t	tms_utime;
372 	l_clock_t	tms_stime;
373 	l_clock_t	tms_cutime;
374 	l_clock_t	tms_cstime;
375 };
376 
377 /*
378  * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
379  * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
380  * auxiliary vector entry.
381  */
382 #define	CLK_TCK		100
383 
384 #define	CONVOTCK(r)	(r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
385 #define	CONVNTCK(r)	(r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
386 
387 #define	CONVTCK(r)	(linux_kernver(td) >= LINUX_KERNVER(2,4,0) ?	\
388 			    CONVNTCK(r) : CONVOTCK(r))
389 
390 int
391 linux_times(struct thread *td, struct linux_times_args *args)
392 {
393 	struct timeval tv, utime, stime, cutime, cstime;
394 	struct l_times_argv tms;
395 	struct proc *p;
396 	int error;
397 
398 	if (args->buf != NULL) {
399 		p = td->td_proc;
400 		PROC_LOCK(p);
401 		PROC_STATLOCK(p);
402 		calcru(p, &utime, &stime);
403 		PROC_STATUNLOCK(p);
404 		calccru(p, &cutime, &cstime);
405 		PROC_UNLOCK(p);
406 
407 		tms.tms_utime = CONVTCK(utime);
408 		tms.tms_stime = CONVTCK(stime);
409 
410 		tms.tms_cutime = CONVTCK(cutime);
411 		tms.tms_cstime = CONVTCK(cstime);
412 
413 		if ((error = copyout(&tms, args->buf, sizeof(tms))))
414 			return (error);
415 	}
416 
417 	microuptime(&tv);
418 	td->td_retval[0] = (int)CONVTCK(tv);
419 	return (0);
420 }
421 
422 int
423 linux_newuname(struct thread *td, struct linux_newuname_args *args)
424 {
425 	struct l_new_utsname utsname;
426 	char osname[LINUX_MAX_UTSNAME];
427 	char osrelease[LINUX_MAX_UTSNAME];
428 	char *p;
429 
430 	linux_get_osname(td, osname);
431 	linux_get_osrelease(td, osrelease);
432 
433 	bzero(&utsname, sizeof(utsname));
434 	strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
435 	getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
436 	getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
437 	strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
438 	strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
439 	for (p = utsname.version; *p != '\0'; ++p)
440 		if (*p == '\n') {
441 			*p = '\0';
442 			break;
443 		}
444 #if defined(__amd64__)
445 	/*
446 	 * On amd64, Linux uname(2) needs to return "x86_64"
447 	 * for both 64-bit and 32-bit applications.  On 32-bit,
448 	 * the string returned by getauxval(AT_PLATFORM) needs
449 	 * to remain "i686", though.
450 	 */
451 #if defined(COMPAT_LINUX32)
452 	if (linux32_emulate_i386)
453 		strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
454 	else
455 #endif
456 	strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
457 #elif defined(__aarch64__)
458 	strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME);
459 #elif defined(__i386__)
460 	strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
461 #endif
462 
463 	return (copyout(&utsname, args->buf, sizeof(utsname)));
464 }
465 
466 struct l_utimbuf {
467 	l_time_t l_actime;
468 	l_time_t l_modtime;
469 };
470 
471 #ifdef LINUX_LEGACY_SYSCALLS
472 int
473 linux_utime(struct thread *td, struct linux_utime_args *args)
474 {
475 	struct timeval tv[2], *tvp;
476 	struct l_utimbuf lut;
477 	int error;
478 
479 	if (args->times) {
480 		if ((error = copyin(args->times, &lut, sizeof lut)) != 0)
481 			return (error);
482 		tv[0].tv_sec = lut.l_actime;
483 		tv[0].tv_usec = 0;
484 		tv[1].tv_sec = lut.l_modtime;
485 		tv[1].tv_usec = 0;
486 		tvp = tv;
487 	} else
488 		tvp = NULL;
489 
490 	return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
491 	    tvp, UIO_SYSSPACE));
492 }
493 #endif
494 
495 #ifdef LINUX_LEGACY_SYSCALLS
496 int
497 linux_utimes(struct thread *td, struct linux_utimes_args *args)
498 {
499 	l_timeval ltv[2];
500 	struct timeval tv[2], *tvp = NULL;
501 	int error;
502 
503 	if (args->tptr != NULL) {
504 		if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0)
505 			return (error);
506 		tv[0].tv_sec = ltv[0].tv_sec;
507 		tv[0].tv_usec = ltv[0].tv_usec;
508 		tv[1].tv_sec = ltv[1].tv_sec;
509 		tv[1].tv_usec = ltv[1].tv_usec;
510 		tvp = tv;
511 	}
512 
513 	return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
514 	    tvp, UIO_SYSSPACE));
515 }
516 #endif
517 
518 static int
519 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times)
520 {
521 
522 	if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
523 	    l_times->tv_nsec != LINUX_UTIME_NOW &&
524 	    (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
525 		return (EINVAL);
526 
527 	times->tv_sec = l_times->tv_sec;
528 	switch (l_times->tv_nsec)
529 	{
530 	case LINUX_UTIME_OMIT:
531 		times->tv_nsec = UTIME_OMIT;
532 		break;
533 	case LINUX_UTIME_NOW:
534 		times->tv_nsec = UTIME_NOW;
535 		break;
536 	default:
537 		times->tv_nsec = l_times->tv_nsec;
538 	}
539 
540 	return (0);
541 }
542 
543 static int
544 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname,
545     struct timespec *timesp, int lflags)
546 {
547 	int dfd, flags = 0;
548 
549 	dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd;
550 
551 	if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH))
552 		return (EINVAL);
553 
554 	if (timesp != NULL) {
555 		/* This breaks POSIX, but is what the Linux kernel does
556 		 * _on purpose_ (documented in the man page for utimensat(2)),
557 		 * so we must follow that behaviour. */
558 		if (timesp[0].tv_nsec == UTIME_OMIT &&
559 		    timesp[1].tv_nsec == UTIME_OMIT)
560 			return (0);
561 	}
562 
563 	if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
564 		flags |= AT_SYMLINK_NOFOLLOW;
565 	if (lflags & LINUX_AT_EMPTY_PATH)
566 		flags |= AT_EMPTY_PATH;
567 
568 	if (pathname != NULL)
569 		return (kern_utimensat(td, dfd, pathname,
570 		    UIO_USERSPACE, timesp, UIO_SYSSPACE, flags));
571 
572 	if (lflags != 0)
573 		return (EINVAL);
574 
575 	return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE));
576 }
577 
578 int
579 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
580 {
581 	struct l_timespec l_times[2];
582 	struct timespec times[2], *timesp;
583 	int error;
584 
585 	if (args->times != NULL) {
586 		error = copyin(args->times, l_times, sizeof(l_times));
587 		if (error != 0)
588 			return (error);
589 
590 		error = linux_utimensat_lts_to_ts(&l_times[0], &times[0]);
591 		if (error != 0)
592 			return (error);
593 		error = linux_utimensat_lts_to_ts(&l_times[1], &times[1]);
594 		if (error != 0)
595 			return (error);
596 		timesp = times;
597 	} else
598 		timesp = NULL;
599 
600 	return (linux_common_utimensat(td, args->dfd, args->pathname,
601 	    timesp, args->flags));
602 }
603 
604 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
605 static int
606 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times)
607 {
608 
609 	/* Zero out the padding in compat mode. */
610 	l_times->tv_nsec &= 0xFFFFFFFFUL;
611 
612 	if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
613 	    l_times->tv_nsec != LINUX_UTIME_NOW &&
614 	    (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
615 		return (EINVAL);
616 
617 	times->tv_sec = l_times->tv_sec;
618 	switch (l_times->tv_nsec)
619 	{
620 	case LINUX_UTIME_OMIT:
621 		times->tv_nsec = UTIME_OMIT;
622 		break;
623 	case LINUX_UTIME_NOW:
624 		times->tv_nsec = UTIME_NOW;
625 		break;
626 	default:
627 		times->tv_nsec = l_times->tv_nsec;
628 	}
629 
630 	return (0);
631 }
632 
633 int
634 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args)
635 {
636 	struct l_timespec64 l_times[2];
637 	struct timespec times[2], *timesp;
638 	int error;
639 
640 	if (args->times64 != NULL) {
641 		error = copyin(args->times64, l_times, sizeof(l_times));
642 		if (error != 0)
643 			return (error);
644 
645 		error = linux_utimensat_lts64_to_ts(&l_times[0], &times[0]);
646 		if (error != 0)
647 			return (error);
648 		error = linux_utimensat_lts64_to_ts(&l_times[1], &times[1]);
649 		if (error != 0)
650 			return (error);
651 		timesp = times;
652 	} else
653 		timesp = NULL;
654 
655 	return (linux_common_utimensat(td, args->dfd, args->pathname,
656 	    timesp, args->flags));
657 }
658 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
659 
660 #ifdef LINUX_LEGACY_SYSCALLS
661 int
662 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
663 {
664 	l_timeval ltv[2];
665 	struct timeval tv[2], *tvp = NULL;
666 	int error, dfd;
667 
668 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
669 
670 	if (args->utimes != NULL) {
671 		if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0)
672 			return (error);
673 		tv[0].tv_sec = ltv[0].tv_sec;
674 		tv[0].tv_usec = ltv[0].tv_usec;
675 		tv[1].tv_sec = ltv[1].tv_sec;
676 		tv[1].tv_usec = ltv[1].tv_usec;
677 		tvp = tv;
678 	}
679 
680 	return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE,
681 	    tvp, UIO_SYSSPACE));
682 }
683 #endif
684 
685 static int
686 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp,
687     int options, void *rup, l_siginfo_t *infop)
688 {
689 	l_siginfo_t lsi;
690 	siginfo_t siginfo;
691 	struct __wrusage wru;
692 	int error, status, tmpstat, sig;
693 
694 	error = kern_wait6(td, idtype, id, &status, options,
695 	    rup != NULL ? &wru : NULL, &siginfo);
696 
697 	if (error == 0 && statusp) {
698 		tmpstat = status & 0xffff;
699 		if (WIFSIGNALED(tmpstat)) {
700 			tmpstat = (tmpstat & 0xffffff80) |
701 			    bsd_to_linux_signal(WTERMSIG(tmpstat));
702 		} else if (WIFSTOPPED(tmpstat)) {
703 			tmpstat = (tmpstat & 0xffff00ff) |
704 			    (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
705 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32))
706 			if (WSTOPSIG(status) == SIGTRAP) {
707 				tmpstat = linux_ptrace_status(td,
708 				    siginfo.si_pid, tmpstat);
709 			}
710 #endif
711 		} else if (WIFCONTINUED(tmpstat)) {
712 			tmpstat = 0xffff;
713 		}
714 		error = copyout(&tmpstat, statusp, sizeof(int));
715 	}
716 	if (error == 0 && rup != NULL)
717 		error = linux_copyout_rusage(&wru.wru_self, rup);
718 	if (error == 0 && infop != NULL && td->td_retval[0] != 0) {
719 		sig = bsd_to_linux_signal(siginfo.si_signo);
720 		siginfo_to_lsiginfo(&siginfo, &lsi, sig);
721 		error = copyout(&lsi, infop, sizeof(lsi));
722 	}
723 
724 	return (error);
725 }
726 
727 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
728 int
729 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
730 {
731 	struct linux_wait4_args wait4_args = {
732 		.pid = args->pid,
733 		.status = args->status,
734 		.options = args->options,
735 		.rusage = NULL,
736 	};
737 
738 	return (linux_wait4(td, &wait4_args));
739 }
740 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
741 
742 int
743 linux_wait4(struct thread *td, struct linux_wait4_args *args)
744 {
745 	struct proc *p;
746 	int options, id, idtype;
747 
748 	if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
749 	    LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
750 		return (EINVAL);
751 
752 	/* -INT_MIN is not defined. */
753 	if (args->pid == INT_MIN)
754 		return (ESRCH);
755 
756 	options = 0;
757 	linux_to_bsd_waitopts(args->options, &options);
758 
759 	/*
760 	 * For backward compatibility we implicitly add flags WEXITED
761 	 * and WTRAPPED here.
762 	 */
763 	options |= WEXITED | WTRAPPED;
764 
765 	if (args->pid == WAIT_ANY) {
766 		idtype = P_ALL;
767 		id = 0;
768 	} else if (args->pid < 0) {
769 		idtype = P_PGID;
770 		id = (id_t)-args->pid;
771 	} else if (args->pid == 0) {
772 		idtype = P_PGID;
773 		p = td->td_proc;
774 		PROC_LOCK(p);
775 		id = p->p_pgid;
776 		PROC_UNLOCK(p);
777 	} else {
778 		idtype = P_PID;
779 		id = (id_t)args->pid;
780 	}
781 
782 	return (linux_common_wait(td, idtype, id, args->status, options,
783 	    args->rusage, NULL));
784 }
785 
786 int
787 linux_waitid(struct thread *td, struct linux_waitid_args *args)
788 {
789 	idtype_t idtype;
790 	int error, options;
791 	struct proc *p;
792 	pid_t id;
793 
794 	if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED |
795 	    LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
796 		return (EINVAL);
797 
798 	options = 0;
799 	linux_to_bsd_waitopts(args->options, &options);
800 
801 	id = args->id;
802 	switch (args->idtype) {
803 	case LINUX_P_ALL:
804 		idtype = P_ALL;
805 		break;
806 	case LINUX_P_PID:
807 		if (args->id <= 0)
808 			return (EINVAL);
809 		idtype = P_PID;
810 		break;
811 	case LINUX_P_PGID:
812 		if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) {
813 			p = td->td_proc;
814 			PROC_LOCK(p);
815 			id = p->p_pgid;
816 			PROC_UNLOCK(p);
817 		} else if (args->id <= 0)
818 			return (EINVAL);
819 		idtype = P_PGID;
820 		break;
821 	case LINUX_P_PIDFD:
822 		LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype");
823 		return (ENOSYS);
824 	default:
825 		return (EINVAL);
826 	}
827 
828 	error = linux_common_wait(td, idtype, id, NULL, options,
829 	    args->rusage, args->info);
830 	td->td_retval[0] = 0;
831 
832 	return (error);
833 }
834 
835 #ifdef LINUX_LEGACY_SYSCALLS
836 int
837 linux_mknod(struct thread *td, struct linux_mknod_args *args)
838 {
839 	int error;
840 
841 	switch (args->mode & S_IFMT) {
842 	case S_IFIFO:
843 	case S_IFSOCK:
844 		error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE,
845 		    args->mode);
846 		break;
847 
848 	case S_IFCHR:
849 	case S_IFBLK:
850 		error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE,
851 		    args->mode, linux_decode_dev(args->dev));
852 		break;
853 
854 	case S_IFDIR:
855 		error = EPERM;
856 		break;
857 
858 	case 0:
859 		args->mode |= S_IFREG;
860 		/* FALLTHROUGH */
861 	case S_IFREG:
862 		error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
863 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
864 		if (error == 0)
865 			kern_close(td, td->td_retval[0]);
866 		break;
867 
868 	default:
869 		error = EINVAL;
870 		break;
871 	}
872 	return (error);
873 }
874 #endif
875 
876 int
877 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
878 {
879 	int error, dfd;
880 
881 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
882 
883 	switch (args->mode & S_IFMT) {
884 	case S_IFIFO:
885 	case S_IFSOCK:
886 		error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE,
887 		    args->mode);
888 		break;
889 
890 	case S_IFCHR:
891 	case S_IFBLK:
892 		error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE,
893 		    args->mode, linux_decode_dev(args->dev));
894 		break;
895 
896 	case S_IFDIR:
897 		error = EPERM;
898 		break;
899 
900 	case 0:
901 		args->mode |= S_IFREG;
902 		/* FALLTHROUGH */
903 	case S_IFREG:
904 		error = kern_openat(td, dfd, args->filename, UIO_USERSPACE,
905 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
906 		if (error == 0)
907 			kern_close(td, td->td_retval[0]);
908 		break;
909 
910 	default:
911 		error = EINVAL;
912 		break;
913 	}
914 	return (error);
915 }
916 
917 /*
918  * UGH! This is just about the dumbest idea I've ever heard!!
919  */
920 int
921 linux_personality(struct thread *td, struct linux_personality_args *args)
922 {
923 	struct linux_pemuldata *pem;
924 	struct proc *p = td->td_proc;
925 	uint32_t old;
926 
927 	PROC_LOCK(p);
928 	pem = pem_find(p);
929 	old = pem->persona;
930 	if (args->per != 0xffffffff)
931 		pem->persona = args->per;
932 	PROC_UNLOCK(p);
933 
934 	td->td_retval[0] = old;
935 	return (0);
936 }
937 
938 struct l_itimerval {
939 	l_timeval it_interval;
940 	l_timeval it_value;
941 };
942 
943 #define	B2L_ITIMERVAL(bip, lip)						\
944 	(bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;		\
945 	(bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;	\
946 	(bip)->it_value.tv_sec = (lip)->it_value.tv_sec;		\
947 	(bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
948 
949 int
950 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
951 {
952 	int error;
953 	struct l_itimerval ls;
954 	struct itimerval aitv, oitv;
955 
956 	if (uap->itv == NULL) {
957 		uap->itv = uap->oitv;
958 		return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
959 	}
960 
961 	error = copyin(uap->itv, &ls, sizeof(ls));
962 	if (error != 0)
963 		return (error);
964 	B2L_ITIMERVAL(&aitv, &ls);
965 	error = kern_setitimer(td, uap->which, &aitv, &oitv);
966 	if (error != 0 || uap->oitv == NULL)
967 		return (error);
968 	B2L_ITIMERVAL(&ls, &oitv);
969 
970 	return (copyout(&ls, uap->oitv, sizeof(ls)));
971 }
972 
973 int
974 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
975 {
976 	int error;
977 	struct l_itimerval ls;
978 	struct itimerval aitv;
979 
980 	error = kern_getitimer(td, uap->which, &aitv);
981 	if (error != 0)
982 		return (error);
983 	B2L_ITIMERVAL(&ls, &aitv);
984 	return (copyout(&ls, uap->itv, sizeof(ls)));
985 }
986 
987 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
988 int
989 linux_nice(struct thread *td, struct linux_nice_args *args)
990 {
991 
992 	return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc));
993 }
994 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
995 
996 int
997 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
998 {
999 	struct ucred *newcred, *oldcred;
1000 	l_gid_t *linux_gidset;
1001 	gid_t *bsd_gidset;
1002 	int ngrp, error;
1003 	struct proc *p;
1004 
1005 	ngrp = args->gidsetsize;
1006 	if (ngrp < 0 || ngrp >= ngroups_max + 1)
1007 		return (EINVAL);
1008 	linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
1009 	error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1010 	if (error)
1011 		goto out;
1012 	newcred = crget();
1013 	crextend(newcred, ngrp + 1);
1014 	p = td->td_proc;
1015 	PROC_LOCK(p);
1016 	oldcred = p->p_ucred;
1017 	crcopy(newcred, oldcred);
1018 
1019 	/*
1020 	 * cr_groups[0] holds egid. Setting the whole set from
1021 	 * the supplied set will cause egid to be changed too.
1022 	 * Keep cr_groups[0] unchanged to prevent that.
1023 	 */
1024 
1025 	if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
1026 		PROC_UNLOCK(p);
1027 		crfree(newcred);
1028 		goto out;
1029 	}
1030 
1031 	if (ngrp > 0) {
1032 		newcred->cr_ngroups = ngrp + 1;
1033 
1034 		bsd_gidset = newcred->cr_groups;
1035 		ngrp--;
1036 		while (ngrp >= 0) {
1037 			bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1038 			ngrp--;
1039 		}
1040 	} else
1041 		newcred->cr_ngroups = 1;
1042 
1043 	setsugid(p);
1044 	proc_set_cred(p, newcred);
1045 	PROC_UNLOCK(p);
1046 	crfree(oldcred);
1047 	error = 0;
1048 out:
1049 	free(linux_gidset, M_LINUX);
1050 	return (error);
1051 }
1052 
1053 int
1054 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1055 {
1056 	struct ucred *cred;
1057 	l_gid_t *linux_gidset;
1058 	gid_t *bsd_gidset;
1059 	int bsd_gidsetsz, ngrp, error;
1060 
1061 	cred = td->td_ucred;
1062 	bsd_gidset = cred->cr_groups;
1063 	bsd_gidsetsz = cred->cr_ngroups - 1;
1064 
1065 	/*
1066 	 * cr_groups[0] holds egid. Returning the whole set
1067 	 * here will cause a duplicate. Exclude cr_groups[0]
1068 	 * to prevent that.
1069 	 */
1070 
1071 	if ((ngrp = args->gidsetsize) == 0) {
1072 		td->td_retval[0] = bsd_gidsetsz;
1073 		return (0);
1074 	}
1075 
1076 	if (ngrp < bsd_gidsetsz)
1077 		return (EINVAL);
1078 
1079 	ngrp = 0;
1080 	linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1081 	    M_LINUX, M_WAITOK);
1082 	while (ngrp < bsd_gidsetsz) {
1083 		linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1084 		ngrp++;
1085 	}
1086 
1087 	error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1088 	free(linux_gidset, M_LINUX);
1089 	if (error)
1090 		return (error);
1091 
1092 	td->td_retval[0] = ngrp;
1093 	return (0);
1094 }
1095 
1096 static bool
1097 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim)
1098 {
1099 
1100 	if (linux_dummy_rlimits == 0)
1101 		return (false);
1102 
1103 	switch (resource) {
1104 	case LINUX_RLIMIT_LOCKS:
1105 	case LINUX_RLIMIT_SIGPENDING:
1106 	case LINUX_RLIMIT_MSGQUEUE:
1107 	case LINUX_RLIMIT_RTTIME:
1108 		rlim->rlim_cur = LINUX_RLIM_INFINITY;
1109 		rlim->rlim_max = LINUX_RLIM_INFINITY;
1110 		return (true);
1111 	case LINUX_RLIMIT_NICE:
1112 	case LINUX_RLIMIT_RTPRIO:
1113 		rlim->rlim_cur = 0;
1114 		rlim->rlim_max = 0;
1115 		return (true);
1116 	default:
1117 		return (false);
1118 	}
1119 }
1120 
1121 int
1122 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1123 {
1124 	struct rlimit bsd_rlim;
1125 	struct l_rlimit rlim;
1126 	u_int which;
1127 	int error;
1128 
1129 	if (args->resource >= LINUX_RLIM_NLIMITS)
1130 		return (EINVAL);
1131 
1132 	which = linux_to_bsd_resource[args->resource];
1133 	if (which == -1)
1134 		return (EINVAL);
1135 
1136 	error = copyin(args->rlim, &rlim, sizeof(rlim));
1137 	if (error)
1138 		return (error);
1139 
1140 	bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1141 	bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1142 	return (kern_setrlimit(td, which, &bsd_rlim));
1143 }
1144 
1145 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1146 int
1147 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1148 {
1149 	struct l_rlimit rlim;
1150 	struct rlimit bsd_rlim;
1151 	u_int which;
1152 
1153 	if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1154 		rlim.rlim_cur = bsd_rlim.rlim_cur;
1155 		rlim.rlim_max = bsd_rlim.rlim_max;
1156 		return (copyout(&rlim, args->rlim, sizeof(rlim)));
1157 	}
1158 
1159 	if (args->resource >= LINUX_RLIM_NLIMITS)
1160 		return (EINVAL);
1161 
1162 	which = linux_to_bsd_resource[args->resource];
1163 	if (which == -1)
1164 		return (EINVAL);
1165 
1166 	lim_rlimit(td, which, &bsd_rlim);
1167 
1168 #ifdef COMPAT_LINUX32
1169 	rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1170 	if (rlim.rlim_cur == UINT_MAX)
1171 		rlim.rlim_cur = INT_MAX;
1172 	rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1173 	if (rlim.rlim_max == UINT_MAX)
1174 		rlim.rlim_max = INT_MAX;
1175 #else
1176 	rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1177 	if (rlim.rlim_cur == ULONG_MAX)
1178 		rlim.rlim_cur = LONG_MAX;
1179 	rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1180 	if (rlim.rlim_max == ULONG_MAX)
1181 		rlim.rlim_max = LONG_MAX;
1182 #endif
1183 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1184 }
1185 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1186 
1187 int
1188 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1189 {
1190 	struct l_rlimit rlim;
1191 	struct rlimit bsd_rlim;
1192 	u_int which;
1193 
1194 	if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1195 		rlim.rlim_cur = bsd_rlim.rlim_cur;
1196 		rlim.rlim_max = bsd_rlim.rlim_max;
1197 		return (copyout(&rlim, args->rlim, sizeof(rlim)));
1198 	}
1199 
1200 	if (args->resource >= LINUX_RLIM_NLIMITS)
1201 		return (EINVAL);
1202 
1203 	which = linux_to_bsd_resource[args->resource];
1204 	if (which == -1)
1205 		return (EINVAL);
1206 
1207 	lim_rlimit(td, which, &bsd_rlim);
1208 
1209 	rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1210 	rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1211 	return (copyout(&rlim, args->rlim, sizeof(rlim)));
1212 }
1213 
1214 int
1215 linux_sched_setscheduler(struct thread *td,
1216     struct linux_sched_setscheduler_args *args)
1217 {
1218 	struct sched_param sched_param;
1219 	struct thread *tdt;
1220 	int error, policy;
1221 
1222 	switch (args->policy) {
1223 	case LINUX_SCHED_OTHER:
1224 		policy = SCHED_OTHER;
1225 		break;
1226 	case LINUX_SCHED_FIFO:
1227 		policy = SCHED_FIFO;
1228 		break;
1229 	case LINUX_SCHED_RR:
1230 		policy = SCHED_RR;
1231 		break;
1232 	default:
1233 		return (EINVAL);
1234 	}
1235 
1236 	error = copyin(args->param, &sched_param, sizeof(sched_param));
1237 	if (error)
1238 		return (error);
1239 
1240 	if (linux_map_sched_prio) {
1241 		switch (policy) {
1242 		case SCHED_OTHER:
1243 			if (sched_param.sched_priority != 0)
1244 				return (EINVAL);
1245 
1246 			sched_param.sched_priority =
1247 			    PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1248 			break;
1249 		case SCHED_FIFO:
1250 		case SCHED_RR:
1251 			if (sched_param.sched_priority < 1 ||
1252 			    sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
1253 				return (EINVAL);
1254 
1255 			/*
1256 			 * Map [1, LINUX_MAX_RT_PRIO - 1] to
1257 			 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1258 			 */
1259 			sched_param.sched_priority =
1260 			    (sched_param.sched_priority - 1) *
1261 			    (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1262 			    (LINUX_MAX_RT_PRIO - 1);
1263 			break;
1264 		}
1265 	}
1266 
1267 	tdt = linux_tdfind(td, args->pid, -1);
1268 	if (tdt == NULL)
1269 		return (ESRCH);
1270 
1271 	error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1272 	PROC_UNLOCK(tdt->td_proc);
1273 	return (error);
1274 }
1275 
1276 int
1277 linux_sched_getscheduler(struct thread *td,
1278     struct linux_sched_getscheduler_args *args)
1279 {
1280 	struct thread *tdt;
1281 	int error, policy;
1282 
1283 	tdt = linux_tdfind(td, args->pid, -1);
1284 	if (tdt == NULL)
1285 		return (ESRCH);
1286 
1287 	error = kern_sched_getscheduler(td, tdt, &policy);
1288 	PROC_UNLOCK(tdt->td_proc);
1289 
1290 	switch (policy) {
1291 	case SCHED_OTHER:
1292 		td->td_retval[0] = LINUX_SCHED_OTHER;
1293 		break;
1294 	case SCHED_FIFO:
1295 		td->td_retval[0] = LINUX_SCHED_FIFO;
1296 		break;
1297 	case SCHED_RR:
1298 		td->td_retval[0] = LINUX_SCHED_RR;
1299 		break;
1300 	}
1301 	return (error);
1302 }
1303 
1304 int
1305 linux_sched_get_priority_max(struct thread *td,
1306     struct linux_sched_get_priority_max_args *args)
1307 {
1308 	struct sched_get_priority_max_args bsd;
1309 
1310 	if (linux_map_sched_prio) {
1311 		switch (args->policy) {
1312 		case LINUX_SCHED_OTHER:
1313 			td->td_retval[0] = 0;
1314 			return (0);
1315 		case LINUX_SCHED_FIFO:
1316 		case LINUX_SCHED_RR:
1317 			td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
1318 			return (0);
1319 		default:
1320 			return (EINVAL);
1321 		}
1322 	}
1323 
1324 	switch (args->policy) {
1325 	case LINUX_SCHED_OTHER:
1326 		bsd.policy = SCHED_OTHER;
1327 		break;
1328 	case LINUX_SCHED_FIFO:
1329 		bsd.policy = SCHED_FIFO;
1330 		break;
1331 	case LINUX_SCHED_RR:
1332 		bsd.policy = SCHED_RR;
1333 		break;
1334 	default:
1335 		return (EINVAL);
1336 	}
1337 	return (sys_sched_get_priority_max(td, &bsd));
1338 }
1339 
1340 int
1341 linux_sched_get_priority_min(struct thread *td,
1342     struct linux_sched_get_priority_min_args *args)
1343 {
1344 	struct sched_get_priority_min_args bsd;
1345 
1346 	if (linux_map_sched_prio) {
1347 		switch (args->policy) {
1348 		case LINUX_SCHED_OTHER:
1349 			td->td_retval[0] = 0;
1350 			return (0);
1351 		case LINUX_SCHED_FIFO:
1352 		case LINUX_SCHED_RR:
1353 			td->td_retval[0] = 1;
1354 			return (0);
1355 		default:
1356 			return (EINVAL);
1357 		}
1358 	}
1359 
1360 	switch (args->policy) {
1361 	case LINUX_SCHED_OTHER:
1362 		bsd.policy = SCHED_OTHER;
1363 		break;
1364 	case LINUX_SCHED_FIFO:
1365 		bsd.policy = SCHED_FIFO;
1366 		break;
1367 	case LINUX_SCHED_RR:
1368 		bsd.policy = SCHED_RR;
1369 		break;
1370 	default:
1371 		return (EINVAL);
1372 	}
1373 	return (sys_sched_get_priority_min(td, &bsd));
1374 }
1375 
1376 #define REBOOT_CAD_ON	0x89abcdef
1377 #define REBOOT_CAD_OFF	0
1378 #define REBOOT_HALT	0xcdef0123
1379 #define REBOOT_RESTART	0x01234567
1380 #define REBOOT_RESTART2	0xA1B2C3D4
1381 #define REBOOT_POWEROFF	0x4321FEDC
1382 #define REBOOT_MAGIC1	0xfee1dead
1383 #define REBOOT_MAGIC2	0x28121969
1384 #define REBOOT_MAGIC2A	0x05121996
1385 #define REBOOT_MAGIC2B	0x16041998
1386 
1387 int
1388 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1389 {
1390 	struct reboot_args bsd_args;
1391 
1392 	if (args->magic1 != REBOOT_MAGIC1)
1393 		return (EINVAL);
1394 
1395 	switch (args->magic2) {
1396 	case REBOOT_MAGIC2:
1397 	case REBOOT_MAGIC2A:
1398 	case REBOOT_MAGIC2B:
1399 		break;
1400 	default:
1401 		return (EINVAL);
1402 	}
1403 
1404 	switch (args->cmd) {
1405 	case REBOOT_CAD_ON:
1406 	case REBOOT_CAD_OFF:
1407 		return (priv_check(td, PRIV_REBOOT));
1408 	case REBOOT_HALT:
1409 		bsd_args.opt = RB_HALT;
1410 		break;
1411 	case REBOOT_RESTART:
1412 	case REBOOT_RESTART2:
1413 		bsd_args.opt = 0;
1414 		break;
1415 	case REBOOT_POWEROFF:
1416 		bsd_args.opt = RB_POWEROFF;
1417 		break;
1418 	default:
1419 		return (EINVAL);
1420 	}
1421 	return (sys_reboot(td, &bsd_args));
1422 }
1423 
1424 int
1425 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1426 {
1427 
1428 	td->td_retval[0] = td->td_proc->p_pid;
1429 
1430 	return (0);
1431 }
1432 
1433 int
1434 linux_gettid(struct thread *td, struct linux_gettid_args *args)
1435 {
1436 	struct linux_emuldata *em;
1437 
1438 	em = em_find(td);
1439 	KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1440 
1441 	td->td_retval[0] = em->em_tid;
1442 
1443 	return (0);
1444 }
1445 
1446 int
1447 linux_getppid(struct thread *td, struct linux_getppid_args *args)
1448 {
1449 
1450 	td->td_retval[0] = kern_getppid(td);
1451 	return (0);
1452 }
1453 
1454 int
1455 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1456 {
1457 
1458 	td->td_retval[0] = td->td_ucred->cr_rgid;
1459 	return (0);
1460 }
1461 
1462 int
1463 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1464 {
1465 
1466 	td->td_retval[0] = td->td_ucred->cr_ruid;
1467 	return (0);
1468 }
1469 
1470 int
1471 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1472 {
1473 
1474 	return (kern_getsid(td, args->pid));
1475 }
1476 
1477 int
1478 linux_nosys(struct thread *td, struct nosys_args *ignore)
1479 {
1480 
1481 	return (ENOSYS);
1482 }
1483 
1484 int
1485 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1486 {
1487 	int error;
1488 
1489 	error = kern_getpriority(td, args->which, args->who);
1490 	td->td_retval[0] = 20 - td->td_retval[0];
1491 	return (error);
1492 }
1493 
1494 int
1495 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1496 {
1497 	int name[2];
1498 
1499 	name[0] = CTL_KERN;
1500 	name[1] = KERN_HOSTNAME;
1501 	return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1502 	    args->len, 0, 0));
1503 }
1504 
1505 int
1506 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1507 {
1508 	int name[2];
1509 
1510 	name[0] = CTL_KERN;
1511 	name[1] = KERN_NISDOMAINNAME;
1512 	return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1513 	    args->len, 0, 0));
1514 }
1515 
1516 int
1517 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1518 {
1519 
1520 	LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1521 	    args->error_code);
1522 
1523 	/*
1524 	 * XXX: we should send a signal to the parent if
1525 	 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1526 	 * as it doesnt occur often.
1527 	 */
1528 	exit1(td, args->error_code, 0);
1529 		/* NOTREACHED */
1530 }
1531 
1532 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
1533 #define _LINUX_CAPABILITY_VERSION_2  0x20071026
1534 #define _LINUX_CAPABILITY_VERSION_3  0x20080522
1535 
1536 struct l_user_cap_header {
1537 	l_int	version;
1538 	l_int	pid;
1539 };
1540 
1541 struct l_user_cap_data {
1542 	l_int	effective;
1543 	l_int	permitted;
1544 	l_int	inheritable;
1545 };
1546 
1547 int
1548 linux_capget(struct thread *td, struct linux_capget_args *uap)
1549 {
1550 	struct l_user_cap_header luch;
1551 	struct l_user_cap_data lucd[2];
1552 	int error, u32s;
1553 
1554 	if (uap->hdrp == NULL)
1555 		return (EFAULT);
1556 
1557 	error = copyin(uap->hdrp, &luch, sizeof(luch));
1558 	if (error != 0)
1559 		return (error);
1560 
1561 	switch (luch.version) {
1562 	case _LINUX_CAPABILITY_VERSION_1:
1563 		u32s = 1;
1564 		break;
1565 	case _LINUX_CAPABILITY_VERSION_2:
1566 	case _LINUX_CAPABILITY_VERSION_3:
1567 		u32s = 2;
1568 		break;
1569 	default:
1570 		luch.version = _LINUX_CAPABILITY_VERSION_1;
1571 		error = copyout(&luch, uap->hdrp, sizeof(luch));
1572 		if (error)
1573 			return (error);
1574 		return (EINVAL);
1575 	}
1576 
1577 	if (luch.pid)
1578 		return (EPERM);
1579 
1580 	if (uap->datap) {
1581 		/*
1582 		 * The current implementation doesn't support setting
1583 		 * a capability (it's essentially a stub) so indicate
1584 		 * that no capabilities are currently set or available
1585 		 * to request.
1586 		 */
1587 		memset(&lucd, 0, u32s * sizeof(lucd[0]));
1588 		error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
1589 	}
1590 
1591 	return (error);
1592 }
1593 
1594 int
1595 linux_capset(struct thread *td, struct linux_capset_args *uap)
1596 {
1597 	struct l_user_cap_header luch;
1598 	struct l_user_cap_data lucd[2];
1599 	int error, i, u32s;
1600 
1601 	if (uap->hdrp == NULL || uap->datap == NULL)
1602 		return (EFAULT);
1603 
1604 	error = copyin(uap->hdrp, &luch, sizeof(luch));
1605 	if (error != 0)
1606 		return (error);
1607 
1608 	switch (luch.version) {
1609 	case _LINUX_CAPABILITY_VERSION_1:
1610 		u32s = 1;
1611 		break;
1612 	case _LINUX_CAPABILITY_VERSION_2:
1613 	case _LINUX_CAPABILITY_VERSION_3:
1614 		u32s = 2;
1615 		break;
1616 	default:
1617 		luch.version = _LINUX_CAPABILITY_VERSION_1;
1618 		error = copyout(&luch, uap->hdrp, sizeof(luch));
1619 		if (error)
1620 			return (error);
1621 		return (EINVAL);
1622 	}
1623 
1624 	if (luch.pid)
1625 		return (EPERM);
1626 
1627 	error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
1628 	if (error != 0)
1629 		return (error);
1630 
1631 	/* We currently don't support setting any capabilities. */
1632 	for (i = 0; i < u32s; i++) {
1633 		if (lucd[i].effective || lucd[i].permitted ||
1634 		    lucd[i].inheritable) {
1635 			linux_msg(td,
1636 			    "capset[%d] effective=0x%x, permitted=0x%x, "
1637 			    "inheritable=0x%x is not implemented", i,
1638 			    (int)lucd[i].effective, (int)lucd[i].permitted,
1639 			    (int)lucd[i].inheritable);
1640 			return (EPERM);
1641 		}
1642 	}
1643 
1644 	return (0);
1645 }
1646 
1647 int
1648 linux_prctl(struct thread *td, struct linux_prctl_args *args)
1649 {
1650 	int error = 0, max_size, arg;
1651 	struct proc *p = td->td_proc;
1652 	char comm[LINUX_MAX_COMM_LEN];
1653 	int pdeath_signal, trace_state;
1654 
1655 	switch (args->option) {
1656 	case LINUX_PR_SET_PDEATHSIG:
1657 		if (!LINUX_SIG_VALID(args->arg2))
1658 			return (EINVAL);
1659 		pdeath_signal = linux_to_bsd_signal(args->arg2);
1660 		return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
1661 		    &pdeath_signal));
1662 	case LINUX_PR_GET_PDEATHSIG:
1663 		error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
1664 		    &pdeath_signal);
1665 		if (error != 0)
1666 			return (error);
1667 		pdeath_signal = bsd_to_linux_signal(pdeath_signal);
1668 		return (copyout(&pdeath_signal,
1669 		    (void *)(register_t)args->arg2,
1670 		    sizeof(pdeath_signal)));
1671 	/*
1672 	 * In Linux, this flag controls if set[gu]id processes can coredump.
1673 	 * There are additional semantics imposed on processes that cannot
1674 	 * coredump:
1675 	 * - Such processes can not be ptraced.
1676 	 * - There are some semantics around ownership of process-related files
1677 	 *   in the /proc namespace.
1678 	 *
1679 	 * In FreeBSD, we can (and by default, do) disable setuid coredump
1680 	 * system-wide with 'sugid_coredump.'  We control tracability on a
1681 	 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag).
1682 	 * By happy coincidence, P2_NOTRACE also prevents coredumping.  So the
1683 	 * procctl is roughly analogous to Linux's DUMPABLE.
1684 	 *
1685 	 * So, proxy these knobs to the corresponding PROC_TRACE setting.
1686 	 */
1687 	case LINUX_PR_GET_DUMPABLE:
1688 		error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS,
1689 		    &trace_state);
1690 		if (error != 0)
1691 			return (error);
1692 		td->td_retval[0] = (trace_state != -1);
1693 		return (0);
1694 	case LINUX_PR_SET_DUMPABLE:
1695 		/*
1696 		 * It is only valid for userspace to set one of these two
1697 		 * flags, and only one at a time.
1698 		 */
1699 		switch (args->arg2) {
1700 		case LINUX_SUID_DUMP_DISABLE:
1701 			trace_state = PROC_TRACE_CTL_DISABLE_EXEC;
1702 			break;
1703 		case LINUX_SUID_DUMP_USER:
1704 			trace_state = PROC_TRACE_CTL_ENABLE;
1705 			break;
1706 		default:
1707 			return (EINVAL);
1708 		}
1709 		return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL,
1710 		    &trace_state));
1711 	case LINUX_PR_GET_KEEPCAPS:
1712 		/*
1713 		 * Indicate that we always clear the effective and
1714 		 * permitted capability sets when the user id becomes
1715 		 * non-zero (actually the capability sets are simply
1716 		 * always zero in the current implementation).
1717 		 */
1718 		td->td_retval[0] = 0;
1719 		break;
1720 	case LINUX_PR_SET_KEEPCAPS:
1721 		/*
1722 		 * Ignore requests to keep the effective and permitted
1723 		 * capability sets when the user id becomes non-zero.
1724 		 */
1725 		break;
1726 	case LINUX_PR_SET_NAME:
1727 		/*
1728 		 * To be on the safe side we need to make sure to not
1729 		 * overflow the size a Linux program expects. We already
1730 		 * do this here in the copyin, so that we don't need to
1731 		 * check on copyout.
1732 		 */
1733 		max_size = MIN(sizeof(comm), sizeof(p->p_comm));
1734 		error = copyinstr((void *)(register_t)args->arg2, comm,
1735 		    max_size, NULL);
1736 
1737 		/* Linux silently truncates the name if it is too long. */
1738 		if (error == ENAMETOOLONG) {
1739 			/*
1740 			 * XXX: copyinstr() isn't documented to populate the
1741 			 * array completely, so do a copyin() to be on the
1742 			 * safe side. This should be changed in case
1743 			 * copyinstr() is changed to guarantee this.
1744 			 */
1745 			error = copyin((void *)(register_t)args->arg2, comm,
1746 			    max_size - 1);
1747 			comm[max_size - 1] = '\0';
1748 		}
1749 		if (error)
1750 			return (error);
1751 
1752 		PROC_LOCK(p);
1753 		strlcpy(p->p_comm, comm, sizeof(p->p_comm));
1754 		PROC_UNLOCK(p);
1755 		break;
1756 	case LINUX_PR_GET_NAME:
1757 		PROC_LOCK(p);
1758 		strlcpy(comm, p->p_comm, sizeof(comm));
1759 		PROC_UNLOCK(p);
1760 		error = copyout(comm, (void *)(register_t)args->arg2,
1761 		    strlen(comm) + 1);
1762 		break;
1763 	case LINUX_PR_GET_SECCOMP:
1764 	case LINUX_PR_SET_SECCOMP:
1765 		/*
1766 		 * Same as returned by Linux without CONFIG_SECCOMP enabled.
1767 		 */
1768 		error = EINVAL;
1769 		break;
1770 	case LINUX_PR_CAPBSET_READ:
1771 #if 0
1772 		/*
1773 		 * This makes too much noise with Ubuntu Focal.
1774 		 */
1775 		linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d",
1776 		    (int)args->arg2);
1777 #endif
1778 		error = EINVAL;
1779 		break;
1780 	case LINUX_PR_SET_NO_NEW_PRIVS:
1781 		arg = args->arg2 == 1 ?
1782 		    PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
1783 		error = kern_procctl(td, P_PID, p->p_pid,
1784 		    PROC_NO_NEW_PRIVS_CTL, &arg);
1785 		break;
1786 	case LINUX_PR_SET_PTRACER:
1787 		linux_msg(td, "unsupported prctl PR_SET_PTRACER");
1788 		error = EINVAL;
1789 		break;
1790 	default:
1791 		linux_msg(td, "unsupported prctl option %d", args->option);
1792 		error = EINVAL;
1793 		break;
1794 	}
1795 
1796 	return (error);
1797 }
1798 
1799 int
1800 linux_sched_setparam(struct thread *td,
1801     struct linux_sched_setparam_args *uap)
1802 {
1803 	struct sched_param sched_param;
1804 	struct thread *tdt;
1805 	int error, policy;
1806 
1807 	error = copyin(uap->param, &sched_param, sizeof(sched_param));
1808 	if (error)
1809 		return (error);
1810 
1811 	tdt = linux_tdfind(td, uap->pid, -1);
1812 	if (tdt == NULL)
1813 		return (ESRCH);
1814 
1815 	if (linux_map_sched_prio) {
1816 		error = kern_sched_getscheduler(td, tdt, &policy);
1817 		if (error)
1818 			goto out;
1819 
1820 		switch (policy) {
1821 		case SCHED_OTHER:
1822 			if (sched_param.sched_priority != 0) {
1823 				error = EINVAL;
1824 				goto out;
1825 			}
1826 			sched_param.sched_priority =
1827 			    PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1828 			break;
1829 		case SCHED_FIFO:
1830 		case SCHED_RR:
1831 			if (sched_param.sched_priority < 1 ||
1832 			    sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
1833 				error = EINVAL;
1834 				goto out;
1835 			}
1836 			/*
1837 			 * Map [1, LINUX_MAX_RT_PRIO - 1] to
1838 			 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1839 			 */
1840 			sched_param.sched_priority =
1841 			    (sched_param.sched_priority - 1) *
1842 			    (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1843 			    (LINUX_MAX_RT_PRIO - 1);
1844 			break;
1845 		}
1846 	}
1847 
1848 	error = kern_sched_setparam(td, tdt, &sched_param);
1849 out:	PROC_UNLOCK(tdt->td_proc);
1850 	return (error);
1851 }
1852 
1853 int
1854 linux_sched_getparam(struct thread *td,
1855     struct linux_sched_getparam_args *uap)
1856 {
1857 	struct sched_param sched_param;
1858 	struct thread *tdt;
1859 	int error, policy;
1860 
1861 	tdt = linux_tdfind(td, uap->pid, -1);
1862 	if (tdt == NULL)
1863 		return (ESRCH);
1864 
1865 	error = kern_sched_getparam(td, tdt, &sched_param);
1866 	if (error) {
1867 		PROC_UNLOCK(tdt->td_proc);
1868 		return (error);
1869 	}
1870 
1871 	if (linux_map_sched_prio) {
1872 		error = kern_sched_getscheduler(td, tdt, &policy);
1873 		PROC_UNLOCK(tdt->td_proc);
1874 		if (error)
1875 			return (error);
1876 
1877 		switch (policy) {
1878 		case SCHED_OTHER:
1879 			sched_param.sched_priority = 0;
1880 			break;
1881 		case SCHED_FIFO:
1882 		case SCHED_RR:
1883 			/*
1884 			 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
1885 			 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
1886 			 */
1887 			sched_param.sched_priority =
1888 			    (sched_param.sched_priority *
1889 			    (LINUX_MAX_RT_PRIO - 1) +
1890 			    (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
1891 			    (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
1892 			break;
1893 		}
1894 	} else
1895 		PROC_UNLOCK(tdt->td_proc);
1896 
1897 	error = copyout(&sched_param, uap->param, sizeof(sched_param));
1898 	return (error);
1899 }
1900 
1901 /*
1902  * Get affinity of a process.
1903  */
1904 int
1905 linux_sched_getaffinity(struct thread *td,
1906     struct linux_sched_getaffinity_args *args)
1907 {
1908 	struct thread *tdt;
1909 	cpuset_t *mask;
1910 	size_t size;
1911 	int error;
1912 	id_t tid;
1913 
1914 	tdt = linux_tdfind(td, args->pid, -1);
1915 	if (tdt == NULL)
1916 		return (ESRCH);
1917 	tid = tdt->td_tid;
1918 	PROC_UNLOCK(tdt->td_proc);
1919 
1920 	mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO);
1921 	size = min(args->len, sizeof(cpuset_t));
1922 	error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
1923 	    tid, size, mask);
1924 	if (error == ERANGE)
1925 		error = EINVAL;
1926  	if (error == 0)
1927 		error = copyout(mask, args->user_mask_ptr, size);
1928 	if (error == 0)
1929 		td->td_retval[0] = size;
1930 	free(mask, M_LINUX);
1931 	return (error);
1932 }
1933 
1934 /*
1935  *  Set affinity of a process.
1936  */
1937 int
1938 linux_sched_setaffinity(struct thread *td,
1939     struct linux_sched_setaffinity_args *args)
1940 {
1941 	struct thread *tdt;
1942 	cpuset_t *mask;
1943 	int cpu, error;
1944 	size_t len;
1945 	id_t tid;
1946 
1947 	tdt = linux_tdfind(td, args->pid, -1);
1948 	if (tdt == NULL)
1949 		return (ESRCH);
1950 	tid = tdt->td_tid;
1951 	PROC_UNLOCK(tdt->td_proc);
1952 
1953 	len = min(args->len, sizeof(cpuset_t));
1954 	mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);;
1955 	error = copyin(args->user_mask_ptr, mask, len);
1956 	if (error != 0)
1957 		goto out;
1958 	/* Linux ignore high bits */
1959 	CPU_FOREACH_ISSET(cpu, mask)
1960 		if (cpu > mp_maxid)
1961 			CPU_CLR(cpu, mask);
1962 
1963 	error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
1964 	    tid, mask);
1965 	if (error == EDEADLK)
1966 		error = EINVAL;
1967 out:
1968 	free(mask, M_TEMP);
1969 	return (error);
1970 }
1971 
1972 struct linux_rlimit64 {
1973 	uint64_t	rlim_cur;
1974 	uint64_t	rlim_max;
1975 };
1976 
1977 int
1978 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
1979 {
1980 	struct rlimit rlim, nrlim;
1981 	struct linux_rlimit64 lrlim;
1982 	struct proc *p;
1983 	u_int which;
1984 	int flags;
1985 	int error;
1986 
1987 	if (args->new == NULL && args->old != NULL) {
1988 		if (linux_get_dummy_limit(args->resource, &rlim)) {
1989 			lrlim.rlim_cur = rlim.rlim_cur;
1990 			lrlim.rlim_max = rlim.rlim_max;
1991 			return (copyout(&lrlim, args->old, sizeof(lrlim)));
1992 		}
1993 	}
1994 
1995 	if (args->resource >= LINUX_RLIM_NLIMITS)
1996 		return (EINVAL);
1997 
1998 	which = linux_to_bsd_resource[args->resource];
1999 	if (which == -1)
2000 		return (EINVAL);
2001 
2002 	if (args->new != NULL) {
2003 		/*
2004 		 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2005 		 * rlim is unsigned 64-bit. FreeBSD treats negative limits
2006 		 * as INFINITY so we do not need a conversion even.
2007 		 */
2008 		error = copyin(args->new, &nrlim, sizeof(nrlim));
2009 		if (error != 0)
2010 			return (error);
2011 	}
2012 
2013 	flags = PGET_HOLD | PGET_NOTWEXIT;
2014 	if (args->new != NULL)
2015 		flags |= PGET_CANDEBUG;
2016 	else
2017 		flags |= PGET_CANSEE;
2018 	if (args->pid == 0) {
2019 		p = td->td_proc;
2020 		PHOLD(p);
2021 	} else {
2022 		error = pget(args->pid, flags, &p);
2023 		if (error != 0)
2024 			return (error);
2025 	}
2026 	if (args->old != NULL) {
2027 		PROC_LOCK(p);
2028 		lim_rlimit_proc(p, which, &rlim);
2029 		PROC_UNLOCK(p);
2030 		if (rlim.rlim_cur == RLIM_INFINITY)
2031 			lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2032 		else
2033 			lrlim.rlim_cur = rlim.rlim_cur;
2034 		if (rlim.rlim_max == RLIM_INFINITY)
2035 			lrlim.rlim_max = LINUX_RLIM_INFINITY;
2036 		else
2037 			lrlim.rlim_max = rlim.rlim_max;
2038 		error = copyout(&lrlim, args->old, sizeof(lrlim));
2039 		if (error != 0)
2040 			goto out;
2041 	}
2042 
2043 	if (args->new != NULL)
2044 		error = kern_proc_setrlimit(td, p, which, &nrlim);
2045 
2046  out:
2047 	PRELE(p);
2048 	return (error);
2049 }
2050 
2051 int
2052 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2053 {
2054 	struct timespec ts, *tsp;
2055 	int error;
2056 
2057 	if (args->tsp != NULL) {
2058 		error = linux_get_timespec(&ts, args->tsp);
2059 		if (error != 0)
2060 			return (error);
2061 		tsp = &ts;
2062 	} else
2063 		tsp = NULL;
2064 
2065 	error = linux_common_pselect6(td, args->nfds, args->readfds,
2066 	    args->writefds, args->exceptfds, tsp, args->sig);
2067 
2068 	if (args->tsp != NULL)
2069 		linux_put_timespec(&ts, args->tsp);
2070 	return (error);
2071 }
2072 
2073 static int
2074 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds,
2075     l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp,
2076     l_uintptr_t *sig)
2077 {
2078 	struct timeval utv, tv0, tv1, *tvp;
2079 	struct l_pselect6arg lpse6;
2080 	sigset_t *ssp;
2081 	sigset_t ss;
2082 	int error;
2083 
2084 	ssp = NULL;
2085 	if (sig != NULL) {
2086 		error = copyin(sig, &lpse6, sizeof(lpse6));
2087 		if (error != 0)
2088 			return (error);
2089 		error = linux_copyin_sigset(td, PTRIN(lpse6.ss),
2090 		    lpse6.ss_len, &ss, &ssp);
2091 		if (error != 0)
2092 		    return (error);
2093 	} else
2094 		ssp = NULL;
2095 
2096 	/*
2097 	 * Currently glibc changes nanosecond number to microsecond.
2098 	 * This mean losing precision but for now it is hardly seen.
2099 	 */
2100 	if (tsp != NULL) {
2101 		TIMESPEC_TO_TIMEVAL(&utv, tsp);
2102 		if (itimerfix(&utv))
2103 			return (EINVAL);
2104 
2105 		microtime(&tv0);
2106 		tvp = &utv;
2107 	} else
2108 		tvp = NULL;
2109 
2110 	error = kern_pselect(td, nfds, readfds, writefds,
2111 	    exceptfds, tvp, ssp, LINUX_NFDBITS);
2112 
2113 	if (tsp != NULL) {
2114 		/*
2115 		 * Compute how much time was left of the timeout,
2116 		 * by subtracting the current time and the time
2117 		 * before we started the call, and subtracting
2118 		 * that result from the user-supplied value.
2119 		 */
2120 		microtime(&tv1);
2121 		timevalsub(&tv1, &tv0);
2122 		timevalsub(&utv, &tv1);
2123 		if (utv.tv_sec < 0)
2124 			timevalclear(&utv);
2125 		TIMEVAL_TO_TIMESPEC(&utv, tsp);
2126 	}
2127 	return (error);
2128 }
2129 
2130 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2131 int
2132 linux_pselect6_time64(struct thread *td,
2133     struct linux_pselect6_time64_args *args)
2134 {
2135 	struct timespec ts, *tsp;
2136 	int error;
2137 
2138 	if (args->tsp != NULL) {
2139 		error = linux_get_timespec64(&ts, args->tsp);
2140 		if (error != 0)
2141 			return (error);
2142 		tsp = &ts;
2143 	} else
2144 		tsp = NULL;
2145 
2146 	error = linux_common_pselect6(td, args->nfds, args->readfds,
2147 	    args->writefds, args->exceptfds, tsp, args->sig);
2148 
2149 	if (args->tsp != NULL)
2150 		linux_put_timespec64(&ts, args->tsp);
2151 	return (error);
2152 }
2153 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2154 
2155 int
2156 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
2157 {
2158 	struct timespec uts, *tsp;
2159 	int error;
2160 
2161 	if (args->tsp != NULL) {
2162 		error = linux_get_timespec(&uts, args->tsp);
2163 		if (error != 0)
2164 			return (error);
2165 		tsp = &uts;
2166 	} else
2167 		tsp = NULL;
2168 
2169 	error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2170 	    args->sset, args->ssize);
2171 	if (error == 0 && args->tsp != NULL)
2172 		error = linux_put_timespec(&uts, args->tsp);
2173 	return (error);
2174 }
2175 
2176 static int
2177 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
2178     struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
2179 {
2180 	struct timespec ts0, ts1;
2181 	struct pollfd stackfds[32];
2182 	struct pollfd *kfds;
2183  	sigset_t *ssp;
2184  	sigset_t ss;
2185  	int error;
2186 
2187 	if (kern_poll_maxfds(nfds))
2188 		return (EINVAL);
2189 	if (sset != NULL) {
2190 		error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp);
2191 		if (error != 0)
2192 		    return (error);
2193 	} else
2194 		ssp = NULL;
2195 	if (tsp != NULL)
2196 		nanotime(&ts0);
2197 
2198 	if (nfds > nitems(stackfds))
2199 		kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
2200 	else
2201 		kfds = stackfds;
2202 	error = linux_pollin(td, kfds, fds, nfds);
2203 	if (error != 0)
2204 		goto out;
2205 
2206 	error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
2207 	if (error == 0)
2208 		error = linux_pollout(td, kfds, fds, nfds);
2209 
2210 	if (error == 0 && tsp != NULL) {
2211 		if (td->td_retval[0]) {
2212 			nanotime(&ts1);
2213 			timespecsub(&ts1, &ts0, &ts1);
2214 			timespecsub(tsp, &ts1, tsp);
2215 			if (tsp->tv_sec < 0)
2216 				timespecclear(tsp);
2217 		} else
2218 			timespecclear(tsp);
2219 	}
2220 
2221 out:
2222 	if (nfds > nitems(stackfds))
2223 		free(kfds, M_TEMP);
2224 	return (error);
2225 }
2226 
2227 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2228 int
2229 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
2230 {
2231 	struct timespec uts, *tsp;
2232 	int error;
2233 
2234 	if (args->tsp != NULL) {
2235 		error = linux_get_timespec64(&uts, args->tsp);
2236 		if (error != 0)
2237 			return (error);
2238 		tsp = &uts;
2239 	} else
2240  		tsp = NULL;
2241 	error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2242 	    args->sset, args->ssize);
2243 	if (error == 0 && args->tsp != NULL)
2244 		error = linux_put_timespec64(&uts, args->tsp);
2245 	return (error);
2246 }
2247 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2248 
2249 static int
2250 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2251 {
2252 	int error;
2253 	u_int i;
2254 
2255 	error = copyin(ufds, fds, nfd * sizeof(*fds));
2256 	if (error != 0)
2257 		return (error);
2258 
2259 	for (i = 0; i < nfd; i++) {
2260 		if (fds->events != 0)
2261 			linux_to_bsd_poll_events(td, fds->fd,
2262 			    fds->events, &fds->events);
2263 		fds++;
2264 	}
2265 	return (0);
2266 }
2267 
2268 static int
2269 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2270 {
2271 	int error = 0;
2272 	u_int i, n = 0;
2273 
2274 	for (i = 0; i < nfd; i++) {
2275 		if (fds->revents != 0) {
2276 			bsd_to_linux_poll_events(fds->revents,
2277 			    &fds->revents);
2278 			n++;
2279 		}
2280 		error = copyout(&fds->revents, &ufds->revents,
2281 		    sizeof(ufds->revents));
2282 		if (error)
2283 			return (error);
2284 		fds++;
2285 		ufds++;
2286 	}
2287 	td->td_retval[0] = n;
2288 	return (0);
2289 }
2290 
2291 static int
2292 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid,
2293     struct timespec *ts)
2294 {
2295 	struct thread *tdt;
2296 	int error;
2297 
2298 	/*
2299 	 * According to man in case the invalid pid specified
2300 	 * EINVAL should be returned.
2301 	 */
2302 	if (pid < 0)
2303 		return (EINVAL);
2304 
2305 	tdt = linux_tdfind(td, pid, -1);
2306 	if (tdt == NULL)
2307 		return (ESRCH);
2308 
2309 	error = kern_sched_rr_get_interval_td(td, tdt, ts);
2310 	PROC_UNLOCK(tdt->td_proc);
2311 	return (error);
2312 }
2313 
2314 int
2315 linux_sched_rr_get_interval(struct thread *td,
2316     struct linux_sched_rr_get_interval_args *uap)
2317 {
2318 	struct timespec ts;
2319 	int error;
2320 
2321 	error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
2322 	if (error != 0)
2323 		return (error);
2324 	return (linux_put_timespec(&ts, uap->interval));
2325 }
2326 
2327 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2328 int
2329 linux_sched_rr_get_interval_time64(struct thread *td,
2330     struct linux_sched_rr_get_interval_time64_args *uap)
2331 {
2332 	struct timespec ts;
2333 	int error;
2334 
2335 	error = linux_sched_rr_get_interval_common(td, uap->pid, &ts);
2336 	if (error != 0)
2337 		return (error);
2338 	return (linux_put_timespec64(&ts, uap->interval));
2339 }
2340 #endif
2341 
2342 /*
2343  * In case when the Linux thread is the initial thread in
2344  * the thread group thread id is equal to the process id.
2345  * Glibc depends on this magic (assert in pthread_getattr_np.c).
2346  */
2347 struct thread *
2348 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2349 {
2350 	struct linux_emuldata *em;
2351 	struct thread *tdt;
2352 	struct proc *p;
2353 
2354 	tdt = NULL;
2355 	if (tid == 0 || tid == td->td_tid) {
2356 		if (pid != -1 && td->td_proc->p_pid != pid)
2357 			return (NULL);
2358 		PROC_LOCK(td->td_proc);
2359 		return (td);
2360 	} else if (tid > PID_MAX)
2361 		return (tdfind(tid, pid));
2362 
2363 	/*
2364 	 * Initial thread where the tid equal to the pid.
2365 	 */
2366 	p = pfind(tid);
2367 	if (p != NULL) {
2368 		if (SV_PROC_ABI(p) != SV_ABI_LINUX ||
2369 		    (pid != -1 && tid != pid)) {
2370 			/*
2371 			 * p is not a Linuxulator process.
2372 			 */
2373 			PROC_UNLOCK(p);
2374 			return (NULL);
2375 		}
2376 		FOREACH_THREAD_IN_PROC(p, tdt) {
2377 			em = em_find(tdt);
2378 			if (tid == em->em_tid)
2379 				return (tdt);
2380 		}
2381 		PROC_UNLOCK(p);
2382 	}
2383 	return (NULL);
2384 }
2385 
2386 void
2387 linux_to_bsd_waitopts(int options, int *bsdopts)
2388 {
2389 
2390 	if (options & LINUX_WNOHANG)
2391 		*bsdopts |= WNOHANG;
2392 	if (options & LINUX_WUNTRACED)
2393 		*bsdopts |= WUNTRACED;
2394 	if (options & LINUX_WEXITED)
2395 		*bsdopts |= WEXITED;
2396 	if (options & LINUX_WCONTINUED)
2397 		*bsdopts |= WCONTINUED;
2398 	if (options & LINUX_WNOWAIT)
2399 		*bsdopts |= WNOWAIT;
2400 
2401 	if (options & __WCLONE)
2402 		*bsdopts |= WLINUXCLONE;
2403 }
2404 
2405 int
2406 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
2407 {
2408 	struct uio uio;
2409 	struct iovec iov;
2410 	int error;
2411 
2412 	if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
2413 		return (EINVAL);
2414 	if (args->count > INT_MAX)
2415 		args->count = INT_MAX;
2416 
2417 	iov.iov_base = args->buf;
2418 	iov.iov_len = args->count;
2419 
2420 	uio.uio_iov = &iov;
2421 	uio.uio_iovcnt = 1;
2422 	uio.uio_resid = iov.iov_len;
2423 	uio.uio_segflg = UIO_USERSPACE;
2424 	uio.uio_rw = UIO_READ;
2425 	uio.uio_td = td;
2426 
2427 	error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
2428 	if (error == 0)
2429 		td->td_retval[0] = args->count - uio.uio_resid;
2430 	return (error);
2431 }
2432 
2433 int
2434 linux_mincore(struct thread *td, struct linux_mincore_args *args)
2435 {
2436 
2437 	/* Needs to be page-aligned */
2438 	if (args->start & PAGE_MASK)
2439 		return (EINVAL);
2440 	return (kern_mincore(td, args->start, args->len, args->vec));
2441 }
2442 
2443 #define	SYSLOG_TAG	"<6>"
2444 
2445 int
2446 linux_syslog(struct thread *td, struct linux_syslog_args *args)
2447 {
2448 	char buf[128], *src, *dst;
2449 	u_int seq;
2450 	int buflen, error;
2451 
2452 	if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
2453 		linux_msg(td, "syslog unsupported type 0x%x", args->type);
2454 		return (EINVAL);
2455 	}
2456 
2457 	if (args->len < 6) {
2458 		td->td_retval[0] = 0;
2459 		return (0);
2460 	}
2461 
2462 	error = priv_check(td, PRIV_MSGBUF);
2463 	if (error)
2464 		return (error);
2465 
2466 	mtx_lock(&msgbuf_lock);
2467 	msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
2468 	mtx_unlock(&msgbuf_lock);
2469 
2470 	dst = args->buf;
2471 	error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
2472 	/* The -1 is to skip the trailing '\0'. */
2473 	dst += sizeof(SYSLOG_TAG) - 1;
2474 
2475 	while (error == 0) {
2476 		mtx_lock(&msgbuf_lock);
2477 		buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
2478 		mtx_unlock(&msgbuf_lock);
2479 
2480 		if (buflen == 0)
2481 			break;
2482 
2483 		for (src = buf; src < buf + buflen && error == 0; src++) {
2484 			if (*src == '\0')
2485 				continue;
2486 
2487 			if (dst >= args->buf + args->len)
2488 				goto out;
2489 
2490 			error = copyout(src, dst, 1);
2491 			dst++;
2492 
2493 			if (*src == '\n' && *(src + 1) != '<' &&
2494 			    dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
2495 				error = copyout(&SYSLOG_TAG,
2496 				    dst, sizeof(SYSLOG_TAG));
2497 				dst += sizeof(SYSLOG_TAG) - 1;
2498 			}
2499 		}
2500 	}
2501 out:
2502 	td->td_retval[0] = dst - args->buf;
2503 	return (error);
2504 }
2505 
2506 int
2507 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
2508 {
2509 	int cpu, error, node;
2510 
2511 	cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
2512 	error = 0;
2513 	node = cpuid_to_pcpu[cpu]->pc_domain;
2514 
2515 	if (args->cpu != NULL)
2516 		error = copyout(&cpu, args->cpu, sizeof(l_int));
2517 	if (args->node != NULL)
2518 		error = copyout(&node, args->node, sizeof(l_int));
2519 	return (error);
2520 }
2521 
2522 #if defined(__i386__) || defined(__amd64__)
2523 int
2524 linux_poll(struct thread *td, struct linux_poll_args *args)
2525 {
2526 	struct timespec ts, *tsp;
2527 
2528 	if (args->timeout != INFTIM) {
2529 		if (args->timeout < 0)
2530 			return (EINVAL);
2531 		ts.tv_sec = args->timeout / 1000;
2532 		ts.tv_nsec = (args->timeout % 1000) * 1000000;
2533 		tsp = &ts;
2534 	} else
2535 		tsp = NULL;
2536 
2537 	return (linux_common_ppoll(td, args->fds, args->nfds,
2538 	    tsp, NULL, 0));
2539 }
2540 #endif /* __i386__ || __amd64__ */
2541 
2542 int
2543 linux_seccomp(struct thread *td, struct linux_seccomp_args *args)
2544 {
2545 
2546 	switch (args->op) {
2547 	case LINUX_SECCOMP_GET_ACTION_AVAIL:
2548 		return (EOPNOTSUPP);
2549 	default:
2550 		/*
2551 		 * Ignore unknown operations, just like Linux kernel built
2552 		 * without CONFIG_SECCOMP.
2553 		 */
2554 		return (EINVAL);
2555 	}
2556 }
2557 
2558 /*
2559  * Custom version of exec_copyin_args(), to copy out argument and environment
2560  * strings from the old process address space into the temporary string buffer.
2561  * Based on freebsd32_exec_copyin_args.
2562  */
2563 static int
2564 linux_exec_copyin_args(struct image_args *args, const char *fname,
2565     enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv)
2566 {
2567 	char *argp, *envp;
2568 	l_uintptr_t *ptr, arg;
2569 	int error;
2570 
2571 	bzero(args, sizeof(*args));
2572 	if (argv == NULL)
2573 		return (EFAULT);
2574 
2575 	/*
2576 	 * Allocate demand-paged memory for the file name, argument, and
2577 	 * environment strings.
2578 	 */
2579 	error = exec_alloc_args(args);
2580 	if (error != 0)
2581 		return (error);
2582 
2583 	/*
2584 	 * Copy the file name.
2585 	 */
2586 	error = exec_args_add_fname(args, fname, segflg);
2587 	if (error != 0)
2588 		goto err_exit;
2589 
2590 	/*
2591 	 * extract arguments first
2592 	 */
2593 	ptr = argv;
2594 	for (;;) {
2595 		error = copyin(ptr++, &arg, sizeof(arg));
2596 		if (error)
2597 			goto err_exit;
2598 		if (arg == 0)
2599 			break;
2600 		argp = PTRIN(arg);
2601 		error = exec_args_add_arg(args, argp, UIO_USERSPACE);
2602 		if (error != 0)
2603 			goto err_exit;
2604 	}
2605 
2606 	/*
2607 	 * This comment is from Linux do_execveat_common:
2608 	 * When argv is empty, add an empty string ("") as argv[0] to
2609 	 * ensure confused userspace programs that start processing
2610 	 * from argv[1] won't end up walking envp.
2611 	 */
2612 	if (args->argc == 0 &&
2613 	    (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0))
2614 		goto err_exit;
2615 
2616 	/*
2617 	 * extract environment strings
2618 	 */
2619 	if (envv) {
2620 		ptr = envv;
2621 		for (;;) {
2622 			error = copyin(ptr++, &arg, sizeof(arg));
2623 			if (error)
2624 				goto err_exit;
2625 			if (arg == 0)
2626 				break;
2627 			envp = PTRIN(arg);
2628 			error = exec_args_add_env(args, envp, UIO_USERSPACE);
2629 			if (error != 0)
2630 				goto err_exit;
2631 		}
2632 	}
2633 
2634 	return (0);
2635 
2636 err_exit:
2637 	exec_free_args(args);
2638 	return (error);
2639 }
2640 
2641 int
2642 linux_execve(struct thread *td, struct linux_execve_args *args)
2643 {
2644 	struct image_args eargs;
2645 	int error;
2646 
2647 	LINUX_CTR(execve);
2648 
2649 	error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE,
2650 	    args->argp, args->envp);
2651 	if (error == 0)
2652 		error = linux_common_execve(td, &eargs);
2653 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
2654 	return (error);
2655 }
2656