xref: /freebsd/sys/compat/linux/linux_file.c (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1995 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/namei.h>
48 #include <sys/selinfo.h>
49 #include <sys/pipe.h>
50 #include <sys/proc.h>
51 #include <sys/stat.h>
52 #include <sys/sx.h>
53 #include <sys/syscallsubr.h>
54 #include <sys/sysproto.h>
55 #include <sys/tty.h>
56 #include <sys/unistd.h>
57 #include <sys/vnode.h>
58 
59 #ifdef COMPAT_LINUX32
60 #include <compat/freebsd32/freebsd32_misc.h>
61 #include <machine/../linux32/linux.h>
62 #include <machine/../linux32/linux32_proto.h>
63 #else
64 #include <machine/../linux/linux.h>
65 #include <machine/../linux/linux_proto.h>
66 #endif
67 #include <compat/linux/linux_misc.h>
68 #include <compat/linux/linux_util.h>
69 #include <compat/linux/linux_file.h>
70 
71 static int	linux_common_open(struct thread *, int, const char *, int, int,
72 		    enum uio_seg);
73 static int	linux_getdents_error(struct thread *, int, int);
74 
75 static struct bsd_to_linux_bitmap seal_bitmap[] = {
76 	BITMAP_1t1_LINUX(F_SEAL_SEAL),
77 	BITMAP_1t1_LINUX(F_SEAL_SHRINK),
78 	BITMAP_1t1_LINUX(F_SEAL_GROW),
79 	BITMAP_1t1_LINUX(F_SEAL_WRITE),
80 };
81 
82 #define	MFD_HUGETLB_ENTRY(_size)					\
83 	{								\
84 		.bsd_value = MFD_HUGE_##_size,				\
85 		.linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size	\
86 	}
87 static struct bsd_to_linux_bitmap mfd_bitmap[] = {
88 	BITMAP_1t1_LINUX(MFD_CLOEXEC),
89 	BITMAP_1t1_LINUX(MFD_ALLOW_SEALING),
90 	BITMAP_1t1_LINUX(MFD_HUGETLB),
91 	MFD_HUGETLB_ENTRY(64KB),
92 	MFD_HUGETLB_ENTRY(512KB),
93 	MFD_HUGETLB_ENTRY(1MB),
94 	MFD_HUGETLB_ENTRY(2MB),
95 	MFD_HUGETLB_ENTRY(8MB),
96 	MFD_HUGETLB_ENTRY(16MB),
97 	MFD_HUGETLB_ENTRY(32MB),
98 	MFD_HUGETLB_ENTRY(256MB),
99 	MFD_HUGETLB_ENTRY(512MB),
100 	MFD_HUGETLB_ENTRY(1GB),
101 	MFD_HUGETLB_ENTRY(2GB),
102 	MFD_HUGETLB_ENTRY(16GB),
103 };
104 #undef MFD_HUGETLB_ENTRY
105 
106 #ifdef LINUX_LEGACY_SYSCALLS
107 int
108 linux_creat(struct thread *td, struct linux_creat_args *args)
109 {
110 	char *path;
111 	int error;
112 
113 	if (!LUSECONVPATH(td)) {
114 		return (kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
115 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode));
116 	}
117 	LCONVPATHEXIST(td, args->path, &path);
118 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
119 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
120 	LFREEPATH(path);
121 	return (error);
122 }
123 #endif
124 
125 static int
126 linux_common_openflags(int l_flags)
127 {
128 	int bsd_flags;
129 
130 	bsd_flags = 0;
131 	switch (l_flags & LINUX_O_ACCMODE) {
132 	case LINUX_O_WRONLY:
133 		bsd_flags |= O_WRONLY;
134 		break;
135 	case LINUX_O_RDWR:
136 		bsd_flags |= O_RDWR;
137 		break;
138 	default:
139 		bsd_flags |= O_RDONLY;
140 	}
141 	if (l_flags & LINUX_O_NDELAY)
142 		bsd_flags |= O_NONBLOCK;
143 	if (l_flags & LINUX_O_APPEND)
144 		bsd_flags |= O_APPEND;
145 	if (l_flags & LINUX_O_SYNC)
146 		bsd_flags |= O_FSYNC;
147 	if (l_flags & LINUX_O_CLOEXEC)
148 		bsd_flags |= O_CLOEXEC;
149 	if (l_flags & LINUX_O_NONBLOCK)
150 		bsd_flags |= O_NONBLOCK;
151 	if (l_flags & LINUX_O_ASYNC)
152 		bsd_flags |= O_ASYNC;
153 	if (l_flags & LINUX_O_CREAT)
154 		bsd_flags |= O_CREAT;
155 	if (l_flags & LINUX_O_TRUNC)
156 		bsd_flags |= O_TRUNC;
157 	if (l_flags & LINUX_O_EXCL)
158 		bsd_flags |= O_EXCL;
159 	if (l_flags & LINUX_O_NOCTTY)
160 		bsd_flags |= O_NOCTTY;
161 	if (l_flags & LINUX_O_DIRECT)
162 		bsd_flags |= O_DIRECT;
163 	if (l_flags & LINUX_O_NOFOLLOW)
164 		bsd_flags |= O_NOFOLLOW;
165 	if (l_flags & LINUX_O_DIRECTORY)
166 		bsd_flags |= O_DIRECTORY;
167 	if (l_flags & LINUX_O_PATH)
168 		bsd_flags |= O_PATH;
169 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
170 	return (bsd_flags);
171 }
172 
173 static int
174 linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags,
175     int mode, enum uio_seg seg)
176 {
177 	struct proc *p = td->td_proc;
178 	struct file *fp;
179 	int fd;
180 	int bsd_flags, error;
181 
182 	bsd_flags = linux_common_openflags(l_flags);
183 	error = kern_openat(td, dirfd, path, seg, bsd_flags, mode);
184 	if (error != 0) {
185 		if (error == EMLINK)
186 			error = ELOOP;
187 		goto done;
188 	}
189 	if (p->p_flag & P_CONTROLT)
190 		goto done;
191 	if (bsd_flags & O_NOCTTY)
192 		goto done;
193 
194 	/*
195 	 * XXX In between kern_openat() and fget(), another process
196 	 * having the same filedesc could use that fd without
197 	 * checking below.
198 	*/
199 	fd = td->td_retval[0];
200 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
201 		if (fp->f_type != DTYPE_VNODE) {
202 			fdrop(fp, td);
203 			goto done;
204 		}
205 		sx_slock(&proctree_lock);
206 		PROC_LOCK(p);
207 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
208 			PROC_UNLOCK(p);
209 			sx_sunlock(&proctree_lock);
210 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
211 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
212 			    td->td_ucred, td);
213 		} else {
214 			PROC_UNLOCK(p);
215 			sx_sunlock(&proctree_lock);
216 		}
217 		fdrop(fp, td);
218 	}
219 
220 done:
221 	return (error);
222 }
223 
224 int
225 linux_openat(struct thread *td, struct linux_openat_args *args)
226 {
227 	char *path;
228 	int dfd, error;
229 
230 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
231 	if (!LUSECONVPATH(td)) {
232 		return (linux_common_open(td, dfd, args->filename, args->flags,
233 		    args->mode, UIO_USERSPACE));
234 	}
235 	if (args->flags & LINUX_O_CREAT)
236 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
237 	else
238 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
239 
240 	error = linux_common_open(td, dfd, path, args->flags, args->mode,
241 	    UIO_SYSSPACE);
242 	LFREEPATH(path);
243 	return (error);
244 }
245 
246 #ifdef LINUX_LEGACY_SYSCALLS
247 int
248 linux_open(struct thread *td, struct linux_open_args *args)
249 {
250 	char *path;
251 	int error;
252 
253 	if (!LUSECONVPATH(td)) {
254 		return (linux_common_open(td, AT_FDCWD, args->path, args->flags,
255 		    args->mode, UIO_USERSPACE));
256 	}
257 	if (args->flags & LINUX_O_CREAT)
258 		LCONVPATHCREAT(td, args->path, &path);
259 	else
260 		LCONVPATHEXIST(td, args->path, &path);
261 
262 	error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode,
263 	    UIO_SYSSPACE);
264 	LFREEPATH(path);
265 	return (error);
266 }
267 #endif
268 
269 int
270 linux_name_to_handle_at(struct thread *td,
271     struct linux_name_to_handle_at_args *args)
272 {
273 	static const l_int valid_flags = (LINUX_AT_SYMLINK_FOLLOW |
274 	    LINUX_AT_EMPTY_PATH);
275 	static const l_uint fh_size = sizeof(fhandle_t);
276 
277 	fhandle_t fh;
278 	l_uint fh_bytes;
279 	l_int mount_id;
280 	int error, fd, bsd_flags;
281 
282 	if (args->flags & ~valid_flags)
283 		return (EINVAL);
284 	if (args->flags & LINUX_AT_EMPTY_PATH)
285 		/* XXX: not supported yet */
286 		return (EOPNOTSUPP);
287 
288 	fd = args->dirfd;
289 	if (fd == LINUX_AT_FDCWD)
290 		fd = AT_FDCWD;
291 
292 	bsd_flags = 0;
293 	if (!(args->flags & LINUX_AT_SYMLINK_FOLLOW))
294 		bsd_flags |= AT_SYMLINK_NOFOLLOW;
295 
296 	if (!LUSECONVPATH(td)) {
297 		error = kern_getfhat(td, bsd_flags, fd, args->name,
298 		    UIO_USERSPACE, &fh, UIO_SYSSPACE);
299 	} else {
300 		char *path;
301 
302 		LCONVPATH_AT(td, args->name, &path, 0, fd);
303 		error = kern_getfhat(td, bsd_flags, fd, path, UIO_SYSSPACE,
304 		    &fh, UIO_SYSSPACE);
305 		LFREEPATH(path);
306 	}
307 	if (error != 0)
308 		return (error);
309 
310 	/* Emit mount_id -- required before EOVERFLOW case. */
311 	mount_id = (fh.fh_fsid.val[0] ^ fh.fh_fsid.val[1]);
312 	error = copyout(&mount_id, args->mnt_id, sizeof(mount_id));
313 	if (error != 0)
314 		return (error);
315 
316 	/* Check if there is room for handle. */
317 	error = copyin(&args->handle->handle_bytes, &fh_bytes,
318 	    sizeof(fh_bytes));
319 	if (error != 0)
320 		return (error);
321 
322 	if (fh_bytes < fh_size) {
323 		error = copyout(&fh_size, &args->handle->handle_bytes,
324 		    sizeof(fh_size));
325 		if (error == 0)
326 			error = EOVERFLOW;
327 		return (error);
328 	}
329 
330 	/* Emit handle. */
331 	mount_id = 0;
332 	/*
333 	 * We don't use handle_type for anything yet, but initialize a known
334 	 * value.
335 	 */
336 	error = copyout(&mount_id, &args->handle->handle_type,
337 	    sizeof(mount_id));
338 	if (error != 0)
339 		return (error);
340 
341 	error = copyout(&fh, &args->handle->f_handle,
342 	    sizeof(fh));
343 	return (error);
344 }
345 
346 int
347 linux_open_by_handle_at(struct thread *td,
348     struct linux_open_by_handle_at_args *args)
349 {
350 	l_uint fh_bytes;
351 	int bsd_flags, error;
352 
353 	error = copyin(&args->handle->handle_bytes, &fh_bytes,
354 	    sizeof(fh_bytes));
355 	if (error != 0)
356 		return (error);
357 
358 	if (fh_bytes < sizeof(fhandle_t))
359 		return (EINVAL);
360 
361 	bsd_flags = linux_common_openflags(args->flags);
362 	return (kern_fhopen(td, (void *)&args->handle->f_handle, bsd_flags));
363 }
364 
365 int
366 linux_lseek(struct thread *td, struct linux_lseek_args *args)
367 {
368 
369 	return (kern_lseek(td, args->fdes, args->off, args->whence));
370 }
371 
372 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
373 int
374 linux_llseek(struct thread *td, struct linux_llseek_args *args)
375 {
376 	int error;
377 	off_t off;
378 
379 	off = (args->olow) | (((off_t) args->ohigh) << 32);
380 
381 	error = kern_lseek(td, args->fd, off, args->whence);
382 	if (error != 0)
383 		return (error);
384 
385 	error = copyout(td->td_retval, args->res, sizeof(off_t));
386 	if (error != 0)
387 		return (error);
388 
389 	td->td_retval[0] = 0;
390 	return (0);
391 }
392 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
393 
394 /*
395  * Note that linux_getdents(2) and linux_getdents64(2) have the same
396  * arguments. They only differ in the definition of struct dirent they
397  * operate on.
398  * Note that linux_readdir(2) is a special case of linux_getdents(2)
399  * where count is always equals 1, meaning that the buffer is one
400  * dirent-structure in size and that the code can't handle more anyway.
401  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
402  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
403  * trash user stack.
404  */
405 
406 static int
407 linux_getdents_error(struct thread *td, int fd, int err)
408 {
409 	struct vnode *vp;
410 	struct file *fp;
411 	int error;
412 
413 	/* Linux return ENOTDIR in case when fd is not a directory. */
414 	error = getvnode(td, fd, &cap_read_rights, &fp);
415 	if (error != 0)
416 		return (error);
417 	vp = fp->f_vnode;
418 	if (vp->v_type != VDIR) {
419 		fdrop(fp, td);
420 		return (ENOTDIR);
421 	}
422 	fdrop(fp, td);
423 	return (err);
424 }
425 
426 struct l_dirent {
427 	l_ulong		d_ino;
428 	l_off_t		d_off;
429 	l_ushort	d_reclen;
430 	char		d_name[LINUX_NAME_MAX + 1];
431 };
432 
433 struct l_dirent64 {
434 	uint64_t	d_ino;
435 	int64_t		d_off;
436 	l_ushort	d_reclen;
437 	u_char		d_type;
438 	char		d_name[LINUX_NAME_MAX + 1];
439 };
440 
441 /*
442  * Linux uses the last byte in the dirent buffer to store d_type,
443  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
444  */
445 #define LINUX_RECLEN(namlen)						\
446     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
447 
448 #define LINUX_RECLEN64(namlen)						\
449     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
450     sizeof(uint64_t))
451 
452 #ifdef LINUX_LEGACY_SYSCALLS
453 int
454 linux_getdents(struct thread *td, struct linux_getdents_args *args)
455 {
456 	struct dirent *bdp;
457 	caddr_t inp, buf;		/* BSD-format */
458 	int len, reclen;		/* BSD-format */
459 	caddr_t outp;			/* Linux-format */
460 	int resid, linuxreclen;		/* Linux-format */
461 	caddr_t lbuf;			/* Linux-format */
462 	off_t base;
463 	struct l_dirent *linux_dirent;
464 	int buflen, error;
465 	size_t retval;
466 
467 	buflen = min(args->count, MAXBSIZE);
468 	buf = malloc(buflen, M_TEMP, M_WAITOK);
469 
470 	error = kern_getdirentries(td, args->fd, buf, buflen,
471 	    &base, NULL, UIO_SYSSPACE);
472 	if (error != 0) {
473 		error = linux_getdents_error(td, args->fd, error);
474 		goto out1;
475 	}
476 
477 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
478 
479 	len = td->td_retval[0];
480 	inp = buf;
481 	outp = (caddr_t)args->dent;
482 	resid = args->count;
483 	retval = 0;
484 
485 	while (len > 0) {
486 		bdp = (struct dirent *) inp;
487 		reclen = bdp->d_reclen;
488 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
489 		/*
490 		 * No more space in the user supplied dirent buffer.
491 		 * Return EINVAL.
492 		 */
493 		if (resid < linuxreclen) {
494 			error = EINVAL;
495 			goto out;
496 		}
497 
498 		linux_dirent = (struct l_dirent*)lbuf;
499 		linux_dirent->d_ino = bdp->d_fileno;
500 		linux_dirent->d_off = base + reclen;
501 		linux_dirent->d_reclen = linuxreclen;
502 		/*
503 		 * Copy d_type to last byte of l_dirent buffer
504 		 */
505 		lbuf[linuxreclen - 1] = bdp->d_type;
506 		strlcpy(linux_dirent->d_name, bdp->d_name,
507 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
508 		error = copyout(linux_dirent, outp, linuxreclen);
509 		if (error != 0)
510 			goto out;
511 
512 		inp += reclen;
513 		base += reclen;
514 		len -= reclen;
515 
516 		retval += linuxreclen;
517 		outp += linuxreclen;
518 		resid -= linuxreclen;
519 	}
520 	td->td_retval[0] = retval;
521 
522 out:
523 	free(lbuf, M_TEMP);
524 out1:
525 	free(buf, M_TEMP);
526 	return (error);
527 }
528 #endif
529 
530 int
531 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
532 {
533 	struct dirent *bdp;
534 	caddr_t inp, buf;		/* BSD-format */
535 	int len, reclen;		/* BSD-format */
536 	caddr_t outp;			/* Linux-format */
537 	int resid, linuxreclen;		/* Linux-format */
538 	caddr_t lbuf;			/* Linux-format */
539 	off_t base;
540 	struct l_dirent64 *linux_dirent64;
541 	int buflen, error;
542 	size_t retval;
543 
544 	buflen = min(args->count, MAXBSIZE);
545 	buf = malloc(buflen, M_TEMP, M_WAITOK);
546 
547 	error = kern_getdirentries(td, args->fd, buf, buflen,
548 	    &base, NULL, UIO_SYSSPACE);
549 	if (error != 0) {
550 		error = linux_getdents_error(td, args->fd, error);
551 		goto out1;
552 	}
553 
554 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
555 
556 	len = td->td_retval[0];
557 	inp = buf;
558 	outp = (caddr_t)args->dirent;
559 	resid = args->count;
560 	retval = 0;
561 
562 	while (len > 0) {
563 		bdp = (struct dirent *) inp;
564 		reclen = bdp->d_reclen;
565 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
566 		/*
567 		 * No more space in the user supplied dirent buffer.
568 		 * Return EINVAL.
569 		 */
570 		if (resid < linuxreclen) {
571 			error = EINVAL;
572 			goto out;
573 		}
574 
575 		linux_dirent64 = (struct l_dirent64*)lbuf;
576 		linux_dirent64->d_ino = bdp->d_fileno;
577 		linux_dirent64->d_off = base + reclen;
578 		linux_dirent64->d_reclen = linuxreclen;
579 		linux_dirent64->d_type = bdp->d_type;
580 		strlcpy(linux_dirent64->d_name, bdp->d_name,
581 		    linuxreclen - offsetof(struct l_dirent64, d_name));
582 		error = copyout(linux_dirent64, outp, linuxreclen);
583 		if (error != 0)
584 			goto out;
585 
586 		inp += reclen;
587 		base += reclen;
588 		len -= reclen;
589 
590 		retval += linuxreclen;
591 		outp += linuxreclen;
592 		resid -= linuxreclen;
593 	}
594 	td->td_retval[0] = retval;
595 
596 out:
597 	free(lbuf, M_TEMP);
598 out1:
599 	free(buf, M_TEMP);
600 	return (error);
601 }
602 
603 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
604 int
605 linux_readdir(struct thread *td, struct linux_readdir_args *args)
606 {
607 	struct dirent *bdp;
608 	caddr_t buf;			/* BSD-format */
609 	int linuxreclen;		/* Linux-format */
610 	caddr_t lbuf;			/* Linux-format */
611 	off_t base;
612 	struct l_dirent *linux_dirent;
613 	int buflen, error;
614 
615 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
616 	buf = malloc(buflen, M_TEMP, M_WAITOK);
617 
618 	error = kern_getdirentries(td, args->fd, buf, buflen,
619 	    &base, NULL, UIO_SYSSPACE);
620 	if (error != 0) {
621 		error = linux_getdents_error(td, args->fd, error);
622 		goto out;
623 	}
624 	if (td->td_retval[0] == 0)
625 		goto out;
626 
627 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
628 
629 	bdp = (struct dirent *) buf;
630 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
631 
632 	linux_dirent = (struct l_dirent*)lbuf;
633 	linux_dirent->d_ino = bdp->d_fileno;
634 	linux_dirent->d_off = linuxreclen;
635 	linux_dirent->d_reclen = bdp->d_namlen;
636 	strlcpy(linux_dirent->d_name, bdp->d_name,
637 	    linuxreclen - offsetof(struct l_dirent, d_name));
638 	error = copyout(linux_dirent, args->dent, linuxreclen);
639 	if (error == 0)
640 		td->td_retval[0] = linuxreclen;
641 
642 	free(lbuf, M_TEMP);
643 out:
644 	free(buf, M_TEMP);
645 	return (error);
646 }
647 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
648 
649 /*
650  * These exist mainly for hooks for doing /compat/linux translation.
651  */
652 
653 #ifdef LINUX_LEGACY_SYSCALLS
654 int
655 linux_access(struct thread *td, struct linux_access_args *args)
656 {
657 	char *path;
658 	int error;
659 
660 	/* Linux convention. */
661 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
662 		return (EINVAL);
663 
664 	if (!LUSECONVPATH(td)) {
665 		error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0,
666 		    args->amode);
667 	} else {
668 		LCONVPATHEXIST(td, args->path, &path);
669 		error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
670 		    args->amode);
671 		LFREEPATH(path);
672 	}
673 
674 	return (error);
675 }
676 #endif
677 
678 int
679 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
680 {
681 	char *path;
682 	int error, dfd;
683 
684 	/* Linux convention. */
685 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
686 		return (EINVAL);
687 
688 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
689 	if (!LUSECONVPATH(td)) {
690 		error = kern_accessat(td, dfd, args->filename, UIO_USERSPACE, 0, args->amode);
691 	} else {
692 		LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
693 		error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
694 		LFREEPATH(path);
695 	}
696 
697 	return (error);
698 }
699 
700 #ifdef LINUX_LEGACY_SYSCALLS
701 int
702 linux_unlink(struct thread *td, struct linux_unlink_args *args)
703 {
704 	char *path;
705 	int error;
706 	struct stat st;
707 
708 	if (!LUSECONVPATH(td)) {
709 		error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE,
710 		    UIO_USERSPACE, 0, 0);
711 		if (error == EPERM) {
712 			/* Introduce POSIX noncompliant behaviour of Linux */
713 			if (kern_statat(td, 0, AT_FDCWD, args->path,
714 			    UIO_SYSSPACE, &st, NULL) == 0) {
715 				if (S_ISDIR(st.st_mode))
716 					error = EISDIR;
717 			}
718 		}
719 	} else {
720 		LCONVPATHEXIST(td, args->path, &path);
721 		error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
722 		if (error == EPERM) {
723 			/* Introduce POSIX noncompliant behaviour of Linux */
724 			if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
725 			    NULL) == 0) {
726 				if (S_ISDIR(st.st_mode))
727 					error = EISDIR;
728 			}
729 		}
730 		LFREEPATH(path);
731 	}
732 
733 	return (error);
734 }
735 #endif
736 
737 static int
738 linux_unlinkat_impl(struct thread *td, enum uio_seg pathseg, const char *path,
739     int dfd, struct linux_unlinkat_args *args)
740 {
741 	struct stat st;
742 	int error;
743 
744 	if (args->flag & LINUX_AT_REMOVEDIR)
745 		error = kern_frmdirat(td, dfd, path, FD_NONE, pathseg, 0);
746 	else
747 		error = kern_funlinkat(td, dfd, path, FD_NONE, pathseg, 0, 0);
748 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
749 		/* Introduce POSIX noncompliant behaviour of Linux */
750 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
751 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
752 			error = EISDIR;
753 	}
754 	return (error);
755 }
756 
757 int
758 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
759 {
760 	char *path;
761 	int error, dfd;
762 
763 	if (args->flag & ~LINUX_AT_REMOVEDIR)
764 		return (EINVAL);
765 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
766 	if (!LUSECONVPATH(td)) {
767 		return (linux_unlinkat_impl(td, UIO_USERSPACE, args->pathname,
768 		    dfd, args));
769 	}
770 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
771 	error = linux_unlinkat_impl(td, UIO_SYSSPACE, path, dfd, args);
772 	LFREEPATH(path);
773 	return (error);
774 }
775 int
776 linux_chdir(struct thread *td, struct linux_chdir_args *args)
777 {
778 	char *path;
779 	int error;
780 
781 	if (!LUSECONVPATH(td)) {
782 		return (kern_chdir(td, args->path, UIO_USERSPACE));
783 	}
784 	LCONVPATHEXIST(td, args->path, &path);
785 	error = kern_chdir(td, path, UIO_SYSSPACE);
786 	LFREEPATH(path);
787 	return (error);
788 }
789 
790 #ifdef LINUX_LEGACY_SYSCALLS
791 int
792 linux_chmod(struct thread *td, struct linux_chmod_args *args)
793 {
794 	char *path;
795 	int error;
796 
797 	if (!LUSECONVPATH(td)) {
798 		return (kern_fchmodat(td, AT_FDCWD, args->path, UIO_USERSPACE,
799 		    args->mode, 0));
800 	}
801 	LCONVPATHEXIST(td, args->path, &path);
802 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0);
803 	LFREEPATH(path);
804 	return (error);
805 }
806 #endif
807 
808 int
809 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
810 {
811 	char *path;
812 	int error, dfd;
813 
814 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
815 	if (!LUSECONVPATH(td)) {
816 		return (kern_fchmodat(td, dfd, args->filename, UIO_USERSPACE,
817 		    args->mode, 0));
818 	}
819 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
820 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
821 	LFREEPATH(path);
822 	return (error);
823 }
824 
825 #ifdef LINUX_LEGACY_SYSCALLS
826 int
827 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
828 {
829 	char *path;
830 	int error;
831 
832 	if (!LUSECONVPATH(td)) {
833 		return (kern_mkdirat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode));
834 	}
835 	LCONVPATHCREAT(td, args->path, &path);
836 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
837 	LFREEPATH(path);
838 	return (error);
839 }
840 #endif
841 
842 int
843 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
844 {
845 	char *path;
846 	int error, dfd;
847 
848 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
849 	if (!LUSECONVPATH(td)) {
850 		return (kern_mkdirat(td, dfd, args->pathname, UIO_USERSPACE, args->mode));
851 	}
852 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
853 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
854 	LFREEPATH(path);
855 	return (error);
856 }
857 
858 #ifdef LINUX_LEGACY_SYSCALLS
859 int
860 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
861 {
862 	char *path;
863 	int error;
864 
865 	if (!LUSECONVPATH(td)) {
866 		return (kern_frmdirat(td, AT_FDCWD, args->path, FD_NONE,
867 		    UIO_USERSPACE, 0));
868 	}
869 	LCONVPATHEXIST(td, args->path, &path);
870 	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
871 	LFREEPATH(path);
872 	return (error);
873 }
874 
875 int
876 linux_rename(struct thread *td, struct linux_rename_args *args)
877 {
878 	char *from, *to;
879 	int error;
880 
881 	if (!LUSECONVPATH(td)) {
882 		return (kern_renameat(td, AT_FDCWD, args->from, AT_FDCWD,
883 		    args->to, UIO_USERSPACE));
884 	}
885 	LCONVPATHEXIST(td, args->from, &from);
886 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
887 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
888 	if (to == NULL) {
889 		LFREEPATH(from);
890 		return (error);
891 	}
892 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
893 	LFREEPATH(from);
894 	LFREEPATH(to);
895 	return (error);
896 }
897 #endif
898 
899 int
900 linux_renameat(struct thread *td, struct linux_renameat_args *args)
901 {
902 	struct linux_renameat2_args renameat2_args = {
903 	    .olddfd = args->olddfd,
904 	    .oldname = args->oldname,
905 	    .newdfd = args->newdfd,
906 	    .newname = args->newname,
907 	    .flags = 0
908 	};
909 
910 	return (linux_renameat2(td, &renameat2_args));
911 }
912 
913 int
914 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
915 {
916 	char *from, *to;
917 	int error, olddfd, newdfd;
918 
919 	if (args->flags != 0) {
920 		if (args->flags & ~(LINUX_RENAME_EXCHANGE |
921 		    LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
922 			return (EINVAL);
923 		if (args->flags & LINUX_RENAME_EXCHANGE &&
924 		    args->flags & (LINUX_RENAME_NOREPLACE |
925 		    LINUX_RENAME_WHITEOUT))
926 			return (EINVAL);
927 #if 0
928 		/*
929 		 * This spams the console on Ubuntu Focal.
930 		 *
931 		 * What's needed here is a general mechanism to let users know
932 		 * about missing features without hogging the system.
933 		 */
934 		linux_msg(td, "renameat2 unsupported flags 0x%x",
935 		    args->flags);
936 #endif
937 		return (EINVAL);
938 	}
939 
940 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
941 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
942 	if (!LUSECONVPATH(td)) {
943 		return (kern_renameat(td, olddfd, args->oldname, newdfd,
944 		    args->newname, UIO_USERSPACE));
945 	}
946 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
947 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
948 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
949 	if (to == NULL) {
950 		LFREEPATH(from);
951 		return (error);
952 	}
953 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
954 	LFREEPATH(from);
955 	LFREEPATH(to);
956 	return (error);
957 }
958 
959 #ifdef LINUX_LEGACY_SYSCALLS
960 int
961 linux_symlink(struct thread *td, struct linux_symlink_args *args)
962 {
963 	char *path, *to;
964 	int error;
965 
966 	if (!LUSECONVPATH(td)) {
967 		return (kern_symlinkat(td, args->path, AT_FDCWD, args->to,
968 		    UIO_USERSPACE));
969 	}
970 	LCONVPATHEXIST(td, args->path, &path);
971 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
972 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
973 	if (to == NULL) {
974 		LFREEPATH(path);
975 		return (error);
976 	}
977 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
978 	LFREEPATH(path);
979 	LFREEPATH(to);
980 	return (error);
981 }
982 #endif
983 
984 int
985 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
986 {
987 	char *path, *to;
988 	int error, dfd;
989 
990 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
991 	if (!LUSECONVPATH(td)) {
992 		return (kern_symlinkat(td, args->oldname, dfd, args->newname,
993 		    UIO_USERSPACE));
994 	}
995 	LCONVPATHEXIST(td, args->oldname, &path);
996 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
997 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
998 	if (to == NULL) {
999 		LFREEPATH(path);
1000 		return (error);
1001 	}
1002 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
1003 	LFREEPATH(path);
1004 	LFREEPATH(to);
1005 	return (error);
1006 }
1007 
1008 #ifdef LINUX_LEGACY_SYSCALLS
1009 int
1010 linux_readlink(struct thread *td, struct linux_readlink_args *args)
1011 {
1012 	char *name;
1013 	int error;
1014 
1015 	if (!LUSECONVPATH(td)) {
1016 		return (kern_readlinkat(td, AT_FDCWD, args->name, UIO_USERSPACE,
1017 		    args->buf, UIO_USERSPACE, args->count));
1018 	}
1019 	LCONVPATHEXIST(td, args->name, &name);
1020 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
1021 	    args->buf, UIO_USERSPACE, args->count);
1022 	LFREEPATH(name);
1023 	return (error);
1024 }
1025 #endif
1026 
1027 int
1028 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
1029 {
1030 	char *name;
1031 	int error, dfd;
1032 
1033 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1034 	if (!LUSECONVPATH(td)) {
1035 		return (kern_readlinkat(td, dfd, args->path, UIO_USERSPACE,
1036 		    args->buf, UIO_USERSPACE, args->bufsiz));
1037 	}
1038 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
1039 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
1040 	    UIO_USERSPACE, args->bufsiz);
1041 	LFREEPATH(name);
1042 	return (error);
1043 }
1044 
1045 int
1046 linux_truncate(struct thread *td, struct linux_truncate_args *args)
1047 {
1048 	char *path;
1049 	int error;
1050 
1051 	if (!LUSECONVPATH(td)) {
1052 		return (kern_truncate(td, args->path, UIO_USERSPACE, args->length));
1053 	}
1054 	LCONVPATHEXIST(td, args->path, &path);
1055 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
1056 	LFREEPATH(path);
1057 	return (error);
1058 }
1059 
1060 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1061 int
1062 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
1063 {
1064 	char *path;
1065 	off_t length;
1066 	int error;
1067 
1068 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1069 	length = PAIR32TO64(off_t, args->length);
1070 #else
1071 	length = args->length;
1072 #endif
1073 
1074 	if (!LUSECONVPATH(td)) {
1075 		return (kern_truncate(td, args->path, UIO_USERSPACE, length));
1076 	}
1077 	LCONVPATHEXIST(td, args->path, &path);
1078 	error = kern_truncate(td, path, UIO_SYSSPACE, length);
1079 	LFREEPATH(path);
1080 	return (error);
1081 }
1082 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1083 
1084 int
1085 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
1086 {
1087 
1088 	return (kern_ftruncate(td, args->fd, args->length));
1089 }
1090 
1091 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1092 int
1093 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
1094 {
1095 	off_t length;
1096 
1097 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1098 	length = PAIR32TO64(off_t, args->length);
1099 #else
1100 	length = args->length;
1101 #endif
1102 
1103 	return (kern_ftruncate(td, args->fd, length));
1104 }
1105 #endif
1106 
1107 #ifdef LINUX_LEGACY_SYSCALLS
1108 int
1109 linux_link(struct thread *td, struct linux_link_args *args)
1110 {
1111 	char *path, *to;
1112 	int error;
1113 
1114 	if (!LUSECONVPATH(td)) {
1115 		return (kern_linkat(td, AT_FDCWD, AT_FDCWD, args->path, args->to,
1116 		    UIO_USERSPACE, AT_SYMLINK_FOLLOW));
1117 	}
1118 	LCONVPATHEXIST(td, args->path, &path);
1119 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1120 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
1121 	if (to == NULL) {
1122 		LFREEPATH(path);
1123 		return (error);
1124 	}
1125 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
1126 	    AT_SYMLINK_FOLLOW);
1127 	LFREEPATH(path);
1128 	LFREEPATH(to);
1129 	return (error);
1130 }
1131 #endif
1132 
1133 int
1134 linux_linkat(struct thread *td, struct linux_linkat_args *args)
1135 {
1136 	char *path, *to;
1137 	int error, olddfd, newdfd, flag;
1138 
1139 	if (args->flag & ~(LINUX_AT_SYMLINK_FOLLOW | LINUX_AT_EMPTY_PATH))
1140 		return (EINVAL);
1141 
1142 	flag = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? AT_SYMLINK_FOLLOW :
1143 	    0;
1144 	flag |= (args->flag & LINUX_AT_EMPTY_PATH) == 0 ? AT_EMPTY_PATH : 0;
1145 
1146 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
1147 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
1148 	if (!LUSECONVPATH(td)) {
1149 		return (kern_linkat(td, olddfd, newdfd, args->oldname,
1150 		    args->newname, UIO_USERSPACE, flag));
1151 	}
1152 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
1153 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1154 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
1155 	if (to == NULL) {
1156 		LFREEPATH(path);
1157 		return (error);
1158 	}
1159 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, flag);
1160 	LFREEPATH(path);
1161 	LFREEPATH(to);
1162 	return (error);
1163 }
1164 
1165 int
1166 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
1167 {
1168 
1169 	return (kern_fsync(td, uap->fd, false));
1170 }
1171 
1172 int
1173 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
1174 {
1175 	off_t nbytes, offset;
1176 
1177 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1178 	nbytes = PAIR32TO64(off_t, uap->nbytes);
1179 	offset = PAIR32TO64(off_t, uap->offset);
1180 #else
1181 	nbytes = uap->nbytes;
1182 	offset = uap->offset;
1183 #endif
1184 
1185 	if (offset < 0 || nbytes < 0 ||
1186 	    (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
1187 	    LINUX_SYNC_FILE_RANGE_WRITE |
1188 	    LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
1189 		return (EINVAL);
1190 	}
1191 
1192 	return (kern_fsync(td, uap->fd, false));
1193 }
1194 
1195 int
1196 linux_pread(struct thread *td, struct linux_pread_args *uap)
1197 {
1198 	struct vnode *vp;
1199 	off_t offset;
1200 	int error;
1201 
1202 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1203 	offset = PAIR32TO64(off_t, uap->offset);
1204 #else
1205 	offset = uap->offset;
1206 #endif
1207 
1208 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
1209 	if (error == 0) {
1210 		/* This seems to violate POSIX but Linux does it. */
1211 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
1212 		if (error != 0)
1213 			return (error);
1214 		if (vp->v_type == VDIR)
1215 			error = EISDIR;
1216 		vrele(vp);
1217 	}
1218 	return (error);
1219 }
1220 
1221 int
1222 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
1223 {
1224 	off_t offset;
1225 
1226 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1227 	offset = PAIR32TO64(off_t, uap->offset);
1228 #else
1229 	offset = uap->offset;
1230 #endif
1231 
1232 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
1233 }
1234 
1235 int
1236 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
1237 {
1238 	struct uio *auio;
1239 	int error;
1240 	off_t offset;
1241 
1242 	/*
1243 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1244 	 * pos_l and pos_h, respectively, contain the
1245 	 * low order and high order 32 bits of offset.
1246 	 */
1247 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1248 	    (sizeof(offset) * 4)) | uap->pos_l;
1249 	if (offset < 0)
1250 		return (EINVAL);
1251 #ifdef COMPAT_LINUX32
1252 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1253 #else
1254 	error = copyinuio(uap->vec, uap->vlen, &auio);
1255 #endif
1256 	if (error != 0)
1257 		return (error);
1258 	error = kern_preadv(td, uap->fd, auio, offset);
1259 	free(auio, M_IOV);
1260 	return (error);
1261 }
1262 
1263 int
1264 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1265 {
1266 	struct uio *auio;
1267 	int error;
1268 	off_t offset;
1269 
1270 	/*
1271 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1272 	 * pos_l and pos_h, respectively, contain the
1273 	 * low order and high order 32 bits of offset.
1274 	 */
1275 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1276 	    (sizeof(offset) * 4)) | uap->pos_l;
1277 	if (offset < 0)
1278 		return (EINVAL);
1279 #ifdef COMPAT_LINUX32
1280 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1281 #else
1282 	error = copyinuio(uap->vec, uap->vlen, &auio);
1283 #endif
1284 	if (error != 0)
1285 		return (error);
1286 	error = kern_pwritev(td, uap->fd, auio, offset);
1287 	free(auio, M_IOV);
1288 	return (error);
1289 }
1290 
1291 int
1292 linux_mount(struct thread *td, struct linux_mount_args *args)
1293 {
1294 	struct mntarg *ma = NULL;
1295 	char *fstypename, *mntonname, *mntfromname, *data;
1296 	int error, fsflags;
1297 
1298 	fstypename = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1299 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1300 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1301 	data = NULL;
1302 	error = copyinstr(args->filesystemtype, fstypename, MNAMELEN - 1,
1303 	    NULL);
1304 	if (error != 0)
1305 		goto out;
1306 	if (args->specialfile != NULL) {
1307 		error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1308 		if (error != 0)
1309 			goto out;
1310 	} else {
1311 		mntfromname[0] = '\0';
1312 	}
1313 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1314 	if (error != 0)
1315 		goto out;
1316 
1317 	if (strcmp(fstypename, "ext2") == 0) {
1318 		strcpy(fstypename, "ext2fs");
1319 	} else if (strcmp(fstypename, "proc") == 0) {
1320 		strcpy(fstypename, "linprocfs");
1321 	} else if (strcmp(fstypename, "vfat") == 0) {
1322 		strcpy(fstypename, "msdosfs");
1323 	} else if (strcmp(fstypename, "fuse") == 0) {
1324 		char *fuse_options, *fuse_option, *fuse_name;
1325 
1326 		if (strcmp(mntfromname, "fuse") == 0)
1327 			strcpy(mntfromname, "/dev/fuse");
1328 
1329 		strcpy(fstypename, "fusefs");
1330 		data = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1331 		error = copyinstr(args->data, data, MNAMELEN - 1, NULL);
1332 		if (error != 0)
1333 			goto out;
1334 
1335 		fuse_options = data;
1336 		while ((fuse_option = strsep(&fuse_options, ",")) != NULL) {
1337 			fuse_name = strsep(&fuse_option, "=");
1338 			if (fuse_name == NULL || fuse_option == NULL)
1339 				goto out;
1340 			ma = mount_arg(ma, fuse_name, fuse_option, -1);
1341 		}
1342 
1343 		/*
1344 		 * The FUSE server uses Linux errno values instead of FreeBSD
1345 		 * ones; add a flag to tell fuse(4) to do errno translation.
1346 		 */
1347 		ma = mount_arg(ma, "linux_errnos", "1", -1);
1348 	}
1349 
1350 	fsflags = 0;
1351 
1352 	/*
1353 	 * Linux SYNC flag is not included; the closest equivalent
1354 	 * FreeBSD has is !ASYNC, which is our default.
1355 	 */
1356 	if (args->rwflag & LINUX_MS_RDONLY)
1357 		fsflags |= MNT_RDONLY;
1358 	if (args->rwflag & LINUX_MS_NOSUID)
1359 		fsflags |= MNT_NOSUID;
1360 	if (args->rwflag & LINUX_MS_NOEXEC)
1361 		fsflags |= MNT_NOEXEC;
1362 	if (args->rwflag & LINUX_MS_REMOUNT)
1363 		fsflags |= MNT_UPDATE;
1364 
1365 	ma = mount_arg(ma, "fstype", fstypename, -1);
1366 	ma = mount_arg(ma, "fspath", mntonname, -1);
1367 	ma = mount_arg(ma, "from", mntfromname, -1);
1368 	error = kernel_mount(ma, fsflags);
1369 out:
1370 	free(fstypename, M_TEMP);
1371 	free(mntonname, M_TEMP);
1372 	free(mntfromname, M_TEMP);
1373 	free(data, M_TEMP);
1374 	return (error);
1375 }
1376 
1377 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1378 int
1379 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1380 {
1381 
1382 	return (kern_unmount(td, args->path, 0));
1383 }
1384 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1385 
1386 #ifdef LINUX_LEGACY_SYSCALLS
1387 int
1388 linux_umount(struct thread *td, struct linux_umount_args *args)
1389 {
1390 	int flags;
1391 
1392 	flags = 0;
1393 	if ((args->flags & LINUX_MNT_FORCE) != 0) {
1394 		args->flags &= ~LINUX_MNT_FORCE;
1395 		flags |= MNT_FORCE;
1396 	}
1397 	if (args->flags != 0) {
1398 		linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1399 		return (EINVAL);
1400 	}
1401 
1402 	return (kern_unmount(td, args->path, flags));
1403 }
1404 #endif
1405 
1406 /*
1407  * fcntl family of syscalls
1408  */
1409 
1410 struct l_flock {
1411 	l_short		l_type;
1412 	l_short		l_whence;
1413 	l_off_t		l_start;
1414 	l_off_t		l_len;
1415 	l_pid_t		l_pid;
1416 }
1417 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1418 __packed
1419 #endif
1420 ;
1421 
1422 static void
1423 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1424 {
1425 	switch (linux_flock->l_type) {
1426 	case LINUX_F_RDLCK:
1427 		bsd_flock->l_type = F_RDLCK;
1428 		break;
1429 	case LINUX_F_WRLCK:
1430 		bsd_flock->l_type = F_WRLCK;
1431 		break;
1432 	case LINUX_F_UNLCK:
1433 		bsd_flock->l_type = F_UNLCK;
1434 		break;
1435 	default:
1436 		bsd_flock->l_type = -1;
1437 		break;
1438 	}
1439 	bsd_flock->l_whence = linux_flock->l_whence;
1440 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1441 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1442 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1443 	bsd_flock->l_sysid = 0;
1444 }
1445 
1446 static void
1447 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1448 {
1449 	switch (bsd_flock->l_type) {
1450 	case F_RDLCK:
1451 		linux_flock->l_type = LINUX_F_RDLCK;
1452 		break;
1453 	case F_WRLCK:
1454 		linux_flock->l_type = LINUX_F_WRLCK;
1455 		break;
1456 	case F_UNLCK:
1457 		linux_flock->l_type = LINUX_F_UNLCK;
1458 		break;
1459 	}
1460 	linux_flock->l_whence = bsd_flock->l_whence;
1461 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1462 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1463 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1464 }
1465 
1466 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1467 struct l_flock64 {
1468 	l_short		l_type;
1469 	l_short		l_whence;
1470 	l_loff_t	l_start;
1471 	l_loff_t	l_len;
1472 	l_pid_t		l_pid;
1473 }
1474 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1475 __packed
1476 #endif
1477 ;
1478 
1479 static void
1480 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1481 {
1482 	switch (linux_flock->l_type) {
1483 	case LINUX_F_RDLCK:
1484 		bsd_flock->l_type = F_RDLCK;
1485 		break;
1486 	case LINUX_F_WRLCK:
1487 		bsd_flock->l_type = F_WRLCK;
1488 		break;
1489 	case LINUX_F_UNLCK:
1490 		bsd_flock->l_type = F_UNLCK;
1491 		break;
1492 	default:
1493 		bsd_flock->l_type = -1;
1494 		break;
1495 	}
1496 	bsd_flock->l_whence = linux_flock->l_whence;
1497 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1498 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1499 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1500 	bsd_flock->l_sysid = 0;
1501 }
1502 
1503 static void
1504 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1505 {
1506 	switch (bsd_flock->l_type) {
1507 	case F_RDLCK:
1508 		linux_flock->l_type = LINUX_F_RDLCK;
1509 		break;
1510 	case F_WRLCK:
1511 		linux_flock->l_type = LINUX_F_WRLCK;
1512 		break;
1513 	case F_UNLCK:
1514 		linux_flock->l_type = LINUX_F_UNLCK;
1515 		break;
1516 	}
1517 	linux_flock->l_whence = bsd_flock->l_whence;
1518 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1519 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1520 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1521 }
1522 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1523 
1524 static int
1525 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1526 {
1527 	struct l_flock linux_flock;
1528 	struct flock bsd_flock;
1529 	struct pipe *fpipe;
1530 	struct file *fp;
1531 	long arg;
1532 	int error, result;
1533 
1534 	switch (args->cmd) {
1535 	case LINUX_F_DUPFD:
1536 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1537 
1538 	case LINUX_F_GETFD:
1539 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1540 
1541 	case LINUX_F_SETFD:
1542 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1543 
1544 	case LINUX_F_GETFL:
1545 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1546 		result = td->td_retval[0];
1547 		td->td_retval[0] = 0;
1548 		if (result & O_RDONLY)
1549 			td->td_retval[0] |= LINUX_O_RDONLY;
1550 		if (result & O_WRONLY)
1551 			td->td_retval[0] |= LINUX_O_WRONLY;
1552 		if (result & O_RDWR)
1553 			td->td_retval[0] |= LINUX_O_RDWR;
1554 		if (result & O_NDELAY)
1555 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1556 		if (result & O_APPEND)
1557 			td->td_retval[0] |= LINUX_O_APPEND;
1558 		if (result & O_FSYNC)
1559 			td->td_retval[0] |= LINUX_O_SYNC;
1560 		if (result & O_ASYNC)
1561 			td->td_retval[0] |= LINUX_O_ASYNC;
1562 #ifdef LINUX_O_NOFOLLOW
1563 		if (result & O_NOFOLLOW)
1564 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1565 #endif
1566 #ifdef LINUX_O_DIRECT
1567 		if (result & O_DIRECT)
1568 			td->td_retval[0] |= LINUX_O_DIRECT;
1569 #endif
1570 		return (error);
1571 
1572 	case LINUX_F_SETFL:
1573 		arg = 0;
1574 		if (args->arg & LINUX_O_NDELAY)
1575 			arg |= O_NONBLOCK;
1576 		if (args->arg & LINUX_O_APPEND)
1577 			arg |= O_APPEND;
1578 		if (args->arg & LINUX_O_SYNC)
1579 			arg |= O_FSYNC;
1580 		if (args->arg & LINUX_O_ASYNC)
1581 			arg |= O_ASYNC;
1582 #ifdef LINUX_O_NOFOLLOW
1583 		if (args->arg & LINUX_O_NOFOLLOW)
1584 			arg |= O_NOFOLLOW;
1585 #endif
1586 #ifdef LINUX_O_DIRECT
1587 		if (args->arg & LINUX_O_DIRECT)
1588 			arg |= O_DIRECT;
1589 #endif
1590 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1591 
1592 	case LINUX_F_GETLK:
1593 		error = copyin((void *)args->arg, &linux_flock,
1594 		    sizeof(linux_flock));
1595 		if (error)
1596 			return (error);
1597 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1598 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1599 		if (error)
1600 			return (error);
1601 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1602 		return (copyout(&linux_flock, (void *)args->arg,
1603 		    sizeof(linux_flock)));
1604 
1605 	case LINUX_F_SETLK:
1606 		error = copyin((void *)args->arg, &linux_flock,
1607 		    sizeof(linux_flock));
1608 		if (error)
1609 			return (error);
1610 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1611 		return (kern_fcntl(td, args->fd, F_SETLK,
1612 		    (intptr_t)&bsd_flock));
1613 
1614 	case LINUX_F_SETLKW:
1615 		error = copyin((void *)args->arg, &linux_flock,
1616 		    sizeof(linux_flock));
1617 		if (error)
1618 			return (error);
1619 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1620 		return (kern_fcntl(td, args->fd, F_SETLKW,
1621 		     (intptr_t)&bsd_flock));
1622 
1623 	case LINUX_F_GETOWN:
1624 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1625 
1626 	case LINUX_F_SETOWN:
1627 		/*
1628 		 * XXX some Linux applications depend on F_SETOWN having no
1629 		 * significant effect for pipes (SIGIO is not delivered for
1630 		 * pipes under Linux-2.2.35 at least).
1631 		 */
1632 		error = fget(td, args->fd,
1633 		    &cap_fcntl_rights, &fp);
1634 		if (error)
1635 			return (error);
1636 		if (fp->f_type == DTYPE_PIPE) {
1637 			fdrop(fp, td);
1638 			return (EINVAL);
1639 		}
1640 		fdrop(fp, td);
1641 
1642 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1643 
1644 	case LINUX_F_DUPFD_CLOEXEC:
1645 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1646 	/*
1647 	 * Our F_SEAL_* values match Linux one for maximum compatibility.  So we
1648 	 * only needed to account for different values for fcntl(2) commands.
1649 	 */
1650 	case LINUX_F_GET_SEALS:
1651 		error = kern_fcntl(td, args->fd, F_GET_SEALS, 0);
1652 		if (error != 0)
1653 			return (error);
1654 		td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0],
1655 		    seal_bitmap, 0);
1656 		return (0);
1657 
1658 	case LINUX_F_ADD_SEALS:
1659 		return (kern_fcntl(td, args->fd, F_ADD_SEALS,
1660 		    linux_to_bsd_bits(args->arg, seal_bitmap, 0)));
1661 
1662 	case LINUX_F_GETPIPE_SZ:
1663 		error = fget(td, args->fd,
1664 		    &cap_fcntl_rights, &fp);
1665 		if (error != 0)
1666 			return (error);
1667 		if (fp->f_type != DTYPE_PIPE) {
1668 			fdrop(fp, td);
1669 			return (EINVAL);
1670 		}
1671 		fpipe = fp->f_data;
1672 		td->td_retval[0] = fpipe->pipe_buffer.size;
1673 		fdrop(fp, td);
1674 		return (0);
1675 
1676 	default:
1677 		linux_msg(td, "unsupported fcntl cmd %d", args->cmd);
1678 		return (EINVAL);
1679 	}
1680 }
1681 
1682 int
1683 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1684 {
1685 
1686 	return (fcntl_common(td, args));
1687 }
1688 
1689 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1690 int
1691 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1692 {
1693 	struct l_flock64 linux_flock;
1694 	struct flock bsd_flock;
1695 	struct linux_fcntl_args fcntl_args;
1696 	int error;
1697 
1698 	switch (args->cmd) {
1699 	case LINUX_F_GETLK64:
1700 		error = copyin((void *)args->arg, &linux_flock,
1701 		    sizeof(linux_flock));
1702 		if (error)
1703 			return (error);
1704 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1705 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1706 		if (error)
1707 			return (error);
1708 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1709 		return (copyout(&linux_flock, (void *)args->arg,
1710 			    sizeof(linux_flock)));
1711 
1712 	case LINUX_F_SETLK64:
1713 		error = copyin((void *)args->arg, &linux_flock,
1714 		    sizeof(linux_flock));
1715 		if (error)
1716 			return (error);
1717 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1718 		return (kern_fcntl(td, args->fd, F_SETLK,
1719 		    (intptr_t)&bsd_flock));
1720 
1721 	case LINUX_F_SETLKW64:
1722 		error = copyin((void *)args->arg, &linux_flock,
1723 		    sizeof(linux_flock));
1724 		if (error)
1725 			return (error);
1726 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1727 		return (kern_fcntl(td, args->fd, F_SETLKW,
1728 		    (intptr_t)&bsd_flock));
1729 	}
1730 
1731 	fcntl_args.fd = args->fd;
1732 	fcntl_args.cmd = args->cmd;
1733 	fcntl_args.arg = args->arg;
1734 	return (fcntl_common(td, &fcntl_args));
1735 }
1736 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1737 
1738 #ifdef LINUX_LEGACY_SYSCALLS
1739 int
1740 linux_chown(struct thread *td, struct linux_chown_args *args)
1741 {
1742 	char *path;
1743 	int error;
1744 
1745 	if (!LUSECONVPATH(td)) {
1746 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE,
1747 		    args->uid, args->gid, 0));
1748 	}
1749 	LCONVPATHEXIST(td, args->path, &path);
1750 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1751 	    args->gid, 0);
1752 	LFREEPATH(path);
1753 	return (error);
1754 }
1755 #endif
1756 
1757 int
1758 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1759 {
1760 	char *path;
1761 	int error, dfd, flag;
1762 
1763 	if (args->flag & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) {
1764 		linux_msg(td, "fchownat unsupported flag 0x%x", args->flag);
1765 		return (EINVAL);
1766 	}
1767 
1768 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1769 	    AT_SYMLINK_NOFOLLOW;
1770 	flag |= (args->flag & LINUX_AT_EMPTY_PATH) == 0 ? 0 :
1771 	    AT_EMPTY_PATH;
1772 
1773 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
1774 	if (!LUSECONVPATH(td)) {
1775 		return (kern_fchownat(td, dfd, args->filename, UIO_USERSPACE,
1776 		    args->uid, args->gid, flag));
1777 	}
1778 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1779 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1780 	    flag);
1781 	LFREEPATH(path);
1782 	return (error);
1783 }
1784 
1785 #ifdef LINUX_LEGACY_SYSCALLS
1786 int
1787 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1788 {
1789 	char *path;
1790 	int error;
1791 
1792 	if (!LUSECONVPATH(td)) {
1793 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->uid,
1794 		    args->gid, AT_SYMLINK_NOFOLLOW));
1795 	}
1796 	LCONVPATHEXIST(td, args->path, &path);
1797 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid,
1798 	    AT_SYMLINK_NOFOLLOW);
1799 	LFREEPATH(path);
1800 	return (error);
1801 }
1802 #endif
1803 
1804 static int
1805 convert_fadvice(int advice)
1806 {
1807 	switch (advice) {
1808 	case LINUX_POSIX_FADV_NORMAL:
1809 		return (POSIX_FADV_NORMAL);
1810 	case LINUX_POSIX_FADV_RANDOM:
1811 		return (POSIX_FADV_RANDOM);
1812 	case LINUX_POSIX_FADV_SEQUENTIAL:
1813 		return (POSIX_FADV_SEQUENTIAL);
1814 	case LINUX_POSIX_FADV_WILLNEED:
1815 		return (POSIX_FADV_WILLNEED);
1816 	case LINUX_POSIX_FADV_DONTNEED:
1817 		return (POSIX_FADV_DONTNEED);
1818 	case LINUX_POSIX_FADV_NOREUSE:
1819 		return (POSIX_FADV_NOREUSE);
1820 	default:
1821 		return (-1);
1822 	}
1823 }
1824 
1825 int
1826 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1827 {
1828 	off_t offset;
1829 	int advice;
1830 
1831 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1832 	offset = PAIR32TO64(off_t, args->offset);
1833 #else
1834 	offset = args->offset;
1835 #endif
1836 
1837 	advice = convert_fadvice(args->advice);
1838 	if (advice == -1)
1839 		return (EINVAL);
1840 	return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1841 }
1842 
1843 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1844 int
1845 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1846 {
1847 	off_t len, offset;
1848 	int advice;
1849 
1850 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1851 	len = PAIR32TO64(off_t, args->len);
1852 	offset = PAIR32TO64(off_t, args->offset);
1853 #else
1854 	len = args->len;
1855 	offset = args->offset;
1856 #endif
1857 
1858 	advice = convert_fadvice(args->advice);
1859 	if (advice == -1)
1860 		return (EINVAL);
1861 	return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1862 }
1863 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1864 
1865 #ifdef LINUX_LEGACY_SYSCALLS
1866 int
1867 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1868 {
1869 	int fildes[2];
1870 	int error;
1871 
1872 	error = kern_pipe(td, fildes, 0, NULL, NULL);
1873 	if (error != 0)
1874 		return (error);
1875 
1876 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1877 	if (error != 0) {
1878 		(void)kern_close(td, fildes[0]);
1879 		(void)kern_close(td, fildes[1]);
1880 	}
1881 
1882 	return (error);
1883 }
1884 #endif
1885 
1886 int
1887 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1888 {
1889 	int fildes[2];
1890 	int error, flags;
1891 
1892 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1893 		return (EINVAL);
1894 
1895 	flags = 0;
1896 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
1897 		flags |= O_NONBLOCK;
1898 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
1899 		flags |= O_CLOEXEC;
1900 	error = kern_pipe(td, fildes, flags, NULL, NULL);
1901 	if (error != 0)
1902 		return (error);
1903 
1904 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1905 	if (error != 0) {
1906 		(void)kern_close(td, fildes[0]);
1907 		(void)kern_close(td, fildes[1]);
1908 	}
1909 
1910 	return (error);
1911 }
1912 
1913 int
1914 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1915 {
1916 	int cmd;
1917 	intptr_t newfd;
1918 
1919 	if (args->oldfd == args->newfd)
1920 		return (EINVAL);
1921 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1922 		return (EINVAL);
1923 	if (args->flags & LINUX_O_CLOEXEC)
1924 		cmd = F_DUP2FD_CLOEXEC;
1925 	else
1926 		cmd = F_DUP2FD;
1927 
1928 	newfd = args->newfd;
1929 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
1930 }
1931 
1932 int
1933 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1934 {
1935 	off_t len, offset;
1936 
1937 	/*
1938 	 * We emulate only posix_fallocate system call for which
1939 	 * mode should be 0.
1940 	 */
1941 	if (args->mode != 0)
1942 		return (EOPNOTSUPP);
1943 
1944 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1945 	len = PAIR32TO64(off_t, args->len);
1946 	offset = PAIR32TO64(off_t, args->offset);
1947 #else
1948 	len = args->len;
1949 	offset = args->offset;
1950 #endif
1951 
1952 	return (kern_posix_fallocate(td, args->fd, offset, len));
1953 }
1954 
1955 int
1956 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1957     *args)
1958 {
1959 	l_loff_t inoff, outoff, *inoffp, *outoffp;
1960 	int error, flags;
1961 
1962 	/*
1963 	 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1964 	 * the native implementation.  Enforce it.
1965 	 */
1966 	if (args->flags != 0) {
1967 		linux_msg(td, "copy_file_range unsupported flags 0x%x",
1968 		    args->flags);
1969 		return (EINVAL);
1970 	}
1971 	flags = 0;
1972 	inoffp = outoffp = NULL;
1973 	if (args->off_in != NULL) {
1974 		error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1975 		if (error != 0)
1976 			return (error);
1977 		inoffp = &inoff;
1978 	}
1979 	if (args->off_out != NULL) {
1980 		error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1981 		if (error != 0)
1982 			return (error);
1983 		outoffp = &outoff;
1984 	}
1985 
1986 	error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1987 	    outoffp, args->len, flags);
1988 	if (error == 0 && args->off_in != NULL)
1989 		error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1990 	if (error == 0 && args->off_out != NULL)
1991 		error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1992 	return (error);
1993 }
1994 
1995 #define	LINUX_MEMFD_PREFIX	"memfd:"
1996 
1997 int
1998 linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
1999 {
2000 	char memfd_name[LINUX_NAME_MAX + 1];
2001 	int error, flags, shmflags, oflags;
2002 
2003 	/*
2004 	 * This is our clever trick to avoid the heap allocation to copy in the
2005 	 * uname.  We don't really need to go this far out of our way, but it
2006 	 * does keep the rest of this function fairly clean as they don't have
2007 	 * to worry about cleanup on the way out.
2008 	 */
2009 	error = copyinstr(args->uname_ptr,
2010 	    memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1,
2011 	    LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL);
2012 	if (error != 0) {
2013 		if (error == ENAMETOOLONG)
2014 			error = EINVAL;
2015 		return (error);
2016 	}
2017 
2018 	memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1);
2019 	flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0);
2020 	if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB |
2021 	    MFD_HUGE_MASK)) != 0)
2022 		return (EINVAL);
2023 	/* Size specified but no HUGETLB. */
2024 	if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0)
2025 		return (EINVAL);
2026 	/* We don't actually support HUGETLB. */
2027 	if ((flags & MFD_HUGETLB) != 0)
2028 		return (ENOSYS);
2029 	oflags = O_RDWR;
2030 	shmflags = SHM_GROW_ON_WRITE;
2031 	if ((flags & MFD_CLOEXEC) != 0)
2032 		oflags |= O_CLOEXEC;
2033 	if ((flags & MFD_ALLOW_SEALING) != 0)
2034 		shmflags |= SHM_ALLOW_SEALING;
2035 	return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
2036 	    memfd_name));
2037 }
2038 
2039 int
2040 linux_splice(struct thread *td, struct linux_splice_args *args)
2041 {
2042 
2043 	linux_msg(td, "syscall splice not really implemented");
2044 
2045 	/*
2046 	 * splice(2) is documented to return EINVAL in various circumstances;
2047 	 * returning it instead of ENOSYS should hint the caller to use fallback
2048 	 * instead.
2049 	 */
2050 	return (EINVAL);
2051 }
2052