xref: /freebsd/sys/compat/linux/linux_file.c (revision 41ce62251c1ed0003fc13b8735de5f9eff4c5c03)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1995 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/stat.h>
50 #include <sys/sx.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
53 #include <sys/tty.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 
57 #ifdef COMPAT_LINUX32
58 #include <compat/freebsd32/freebsd32_misc.h>
59 #include <machine/../linux32/linux.h>
60 #include <machine/../linux32/linux32_proto.h>
61 #else
62 #include <machine/../linux/linux.h>
63 #include <machine/../linux/linux_proto.h>
64 #endif
65 #include <compat/linux/linux_misc.h>
66 #include <compat/linux/linux_util.h>
67 #include <compat/linux/linux_file.h>
68 
69 static int	linux_common_open(struct thread *, int, const char *, int, int,
70 		    enum uio_seg);
71 static int	linux_getdents_error(struct thread *, int, int);
72 
73 static struct bsd_to_linux_bitmap seal_bitmap[] = {
74 	BITMAP_1t1_LINUX(F_SEAL_SEAL),
75 	BITMAP_1t1_LINUX(F_SEAL_SHRINK),
76 	BITMAP_1t1_LINUX(F_SEAL_GROW),
77 	BITMAP_1t1_LINUX(F_SEAL_WRITE),
78 };
79 
80 #define	MFD_HUGETLB_ENTRY(_size)					\
81 	{								\
82 		.bsd_value = MFD_HUGE_##_size,				\
83 		.linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size	\
84 	}
85 static struct bsd_to_linux_bitmap mfd_bitmap[] = {
86 	BITMAP_1t1_LINUX(MFD_CLOEXEC),
87 	BITMAP_1t1_LINUX(MFD_ALLOW_SEALING),
88 	BITMAP_1t1_LINUX(MFD_HUGETLB),
89 	MFD_HUGETLB_ENTRY(64KB),
90 	MFD_HUGETLB_ENTRY(512KB),
91 	MFD_HUGETLB_ENTRY(1MB),
92 	MFD_HUGETLB_ENTRY(2MB),
93 	MFD_HUGETLB_ENTRY(8MB),
94 	MFD_HUGETLB_ENTRY(16MB),
95 	MFD_HUGETLB_ENTRY(32MB),
96 	MFD_HUGETLB_ENTRY(256MB),
97 	MFD_HUGETLB_ENTRY(512MB),
98 	MFD_HUGETLB_ENTRY(1GB),
99 	MFD_HUGETLB_ENTRY(2GB),
100 	MFD_HUGETLB_ENTRY(16GB),
101 };
102 #undef MFD_HUGETLB_ENTRY
103 
104 #ifdef LINUX_LEGACY_SYSCALLS
105 int
106 linux_creat(struct thread *td, struct linux_creat_args *args)
107 {
108 	char *path;
109 	int error;
110 
111 	if (!LUSECONVPATH(td)) {
112 		return (kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
113 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode));
114 	}
115 	LCONVPATHEXIST(td, args->path, &path);
116 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
117 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
118 	LFREEPATH(path);
119 	return (error);
120 }
121 #endif
122 
123 static int
124 linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags,
125     int mode, enum uio_seg seg)
126 {
127 	struct proc *p = td->td_proc;
128 	struct file *fp;
129 	int fd;
130 	int bsd_flags, error;
131 
132 	bsd_flags = 0;
133 	switch (l_flags & LINUX_O_ACCMODE) {
134 	case LINUX_O_WRONLY:
135 		bsd_flags |= O_WRONLY;
136 		break;
137 	case LINUX_O_RDWR:
138 		bsd_flags |= O_RDWR;
139 		break;
140 	default:
141 		bsd_flags |= O_RDONLY;
142 	}
143 	if (l_flags & LINUX_O_NDELAY)
144 		bsd_flags |= O_NONBLOCK;
145 	if (l_flags & LINUX_O_APPEND)
146 		bsd_flags |= O_APPEND;
147 	if (l_flags & LINUX_O_SYNC)
148 		bsd_flags |= O_FSYNC;
149 	if (l_flags & LINUX_O_CLOEXEC)
150 		bsd_flags |= O_CLOEXEC;
151 	if (l_flags & LINUX_O_NONBLOCK)
152 		bsd_flags |= O_NONBLOCK;
153 	if (l_flags & LINUX_O_ASYNC)
154 		bsd_flags |= O_ASYNC;
155 	if (l_flags & LINUX_O_CREAT)
156 		bsd_flags |= O_CREAT;
157 	if (l_flags & LINUX_O_TRUNC)
158 		bsd_flags |= O_TRUNC;
159 	if (l_flags & LINUX_O_EXCL)
160 		bsd_flags |= O_EXCL;
161 	if (l_flags & LINUX_O_NOCTTY)
162 		bsd_flags |= O_NOCTTY;
163 	if (l_flags & LINUX_O_DIRECT)
164 		bsd_flags |= O_DIRECT;
165 	if (l_flags & LINUX_O_NOFOLLOW)
166 		bsd_flags |= O_NOFOLLOW;
167 	if (l_flags & LINUX_O_DIRECTORY)
168 		bsd_flags |= O_DIRECTORY;
169 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
170 
171 	error = kern_openat(td, dirfd, path, seg, bsd_flags, mode);
172 	if (error != 0) {
173 		if (error == EMLINK)
174 			error = ELOOP;
175 		goto done;
176 	}
177 	if (p->p_flag & P_CONTROLT)
178 		goto done;
179 	if (bsd_flags & O_NOCTTY)
180 		goto done;
181 
182 	/*
183 	 * XXX In between kern_openat() and fget(), another process
184 	 * having the same filedesc could use that fd without
185 	 * checking below.
186 	*/
187 	fd = td->td_retval[0];
188 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
189 		if (fp->f_type != DTYPE_VNODE) {
190 			fdrop(fp, td);
191 			goto done;
192 		}
193 		sx_slock(&proctree_lock);
194 		PROC_LOCK(p);
195 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
196 			PROC_UNLOCK(p);
197 			sx_sunlock(&proctree_lock);
198 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
199 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
200 			    td->td_ucred, td);
201 		} else {
202 			PROC_UNLOCK(p);
203 			sx_sunlock(&proctree_lock);
204 		}
205 		fdrop(fp, td);
206 	}
207 
208 done:
209 	return (error);
210 }
211 
212 int
213 linux_openat(struct thread *td, struct linux_openat_args *args)
214 {
215 	char *path;
216 	int dfd, error;
217 
218 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
219 	if (!LUSECONVPATH(td)) {
220 		return (linux_common_open(td, dfd, args->filename, args->flags,
221 		    args->mode, UIO_USERSPACE));
222 	}
223 	if (args->flags & LINUX_O_CREAT)
224 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
225 	else
226 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
227 
228 	error = linux_common_open(td, dfd, path, args->flags, args->mode,
229 	    UIO_SYSSPACE);
230 	LFREEPATH(path);
231 	return (error);
232 }
233 
234 #ifdef LINUX_LEGACY_SYSCALLS
235 int
236 linux_open(struct thread *td, struct linux_open_args *args)
237 {
238 	char *path;
239 	int error;
240 
241 	if (!LUSECONVPATH(td)) {
242 		return (linux_common_open(td, AT_FDCWD, args->path, args->flags,
243 		    args->mode, UIO_USERSPACE));
244 	}
245 	if (args->flags & LINUX_O_CREAT)
246 		LCONVPATHCREAT(td, args->path, &path);
247 	else
248 		LCONVPATHEXIST(td, args->path, &path);
249 
250 	error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode,
251 	    UIO_SYSSPACE);
252 	LFREEPATH(path);
253 	return (error);
254 }
255 #endif
256 
257 int
258 linux_lseek(struct thread *td, struct linux_lseek_args *args)
259 {
260 
261 	return (kern_lseek(td, args->fdes, args->off, args->whence));
262 }
263 
264 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
265 int
266 linux_llseek(struct thread *td, struct linux_llseek_args *args)
267 {
268 	int error;
269 	off_t off;
270 
271 	off = (args->olow) | (((off_t) args->ohigh) << 32);
272 
273 	error = kern_lseek(td, args->fd, off, args->whence);
274 	if (error != 0)
275 		return (error);
276 
277 	error = copyout(td->td_retval, args->res, sizeof(off_t));
278 	if (error != 0)
279 		return (error);
280 
281 	td->td_retval[0] = 0;
282 	return (0);
283 }
284 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
285 
286 /*
287  * Note that linux_getdents(2) and linux_getdents64(2) have the same
288  * arguments. They only differ in the definition of struct dirent they
289  * operate on.
290  * Note that linux_readdir(2) is a special case of linux_getdents(2)
291  * where count is always equals 1, meaning that the buffer is one
292  * dirent-structure in size and that the code can't handle more anyway.
293  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
294  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
295  * trash user stack.
296  */
297 
298 static int
299 linux_getdents_error(struct thread *td, int fd, int err)
300 {
301 	struct vnode *vp;
302 	struct file *fp;
303 	int error;
304 
305 	/* Linux return ENOTDIR in case when fd is not a directory. */
306 	error = getvnode(td, fd, &cap_read_rights, &fp);
307 	if (error != 0)
308 		return (error);
309 	vp = fp->f_vnode;
310 	if (vp->v_type != VDIR) {
311 		fdrop(fp, td);
312 		return (ENOTDIR);
313 	}
314 	fdrop(fp, td);
315 	return (err);
316 }
317 
318 struct l_dirent {
319 	l_ulong		d_ino;
320 	l_off_t		d_off;
321 	l_ushort	d_reclen;
322 	char		d_name[LINUX_NAME_MAX + 1];
323 };
324 
325 struct l_dirent64 {
326 	uint64_t	d_ino;
327 	int64_t		d_off;
328 	l_ushort	d_reclen;
329 	u_char		d_type;
330 	char		d_name[LINUX_NAME_MAX + 1];
331 };
332 
333 /*
334  * Linux uses the last byte in the dirent buffer to store d_type,
335  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
336  */
337 #define LINUX_RECLEN(namlen)						\
338     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
339 
340 #define LINUX_RECLEN64(namlen)						\
341     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
342     sizeof(uint64_t))
343 
344 #ifdef LINUX_LEGACY_SYSCALLS
345 int
346 linux_getdents(struct thread *td, struct linux_getdents_args *args)
347 {
348 	struct dirent *bdp;
349 	caddr_t inp, buf;		/* BSD-format */
350 	int len, reclen;		/* BSD-format */
351 	caddr_t outp;			/* Linux-format */
352 	int resid, linuxreclen;		/* Linux-format */
353 	caddr_t lbuf;			/* Linux-format */
354 	off_t base;
355 	struct l_dirent *linux_dirent;
356 	int buflen, error;
357 	size_t retval;
358 
359 	buflen = min(args->count, MAXBSIZE);
360 	buf = malloc(buflen, M_TEMP, M_WAITOK);
361 
362 	error = kern_getdirentries(td, args->fd, buf, buflen,
363 	    &base, NULL, UIO_SYSSPACE);
364 	if (error != 0) {
365 		error = linux_getdents_error(td, args->fd, error);
366 		goto out1;
367 	}
368 
369 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
370 
371 	len = td->td_retval[0];
372 	inp = buf;
373 	outp = (caddr_t)args->dent;
374 	resid = args->count;
375 	retval = 0;
376 
377 	while (len > 0) {
378 		bdp = (struct dirent *) inp;
379 		reclen = bdp->d_reclen;
380 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
381 		/*
382 		 * No more space in the user supplied dirent buffer.
383 		 * Return EINVAL.
384 		 */
385 		if (resid < linuxreclen) {
386 			error = EINVAL;
387 			goto out;
388 		}
389 
390 		linux_dirent = (struct l_dirent*)lbuf;
391 		linux_dirent->d_ino = bdp->d_fileno;
392 		linux_dirent->d_off = base + reclen;
393 		linux_dirent->d_reclen = linuxreclen;
394 		/*
395 		 * Copy d_type to last byte of l_dirent buffer
396 		 */
397 		lbuf[linuxreclen - 1] = bdp->d_type;
398 		strlcpy(linux_dirent->d_name, bdp->d_name,
399 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
400 		error = copyout(linux_dirent, outp, linuxreclen);
401 		if (error != 0)
402 			goto out;
403 
404 		inp += reclen;
405 		base += reclen;
406 		len -= reclen;
407 
408 		retval += linuxreclen;
409 		outp += linuxreclen;
410 		resid -= linuxreclen;
411 	}
412 	td->td_retval[0] = retval;
413 
414 out:
415 	free(lbuf, M_TEMP);
416 out1:
417 	free(buf, M_TEMP);
418 	return (error);
419 }
420 #endif
421 
422 int
423 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
424 {
425 	struct dirent *bdp;
426 	caddr_t inp, buf;		/* BSD-format */
427 	int len, reclen;		/* BSD-format */
428 	caddr_t outp;			/* Linux-format */
429 	int resid, linuxreclen;		/* Linux-format */
430 	caddr_t lbuf;			/* Linux-format */
431 	off_t base;
432 	struct l_dirent64 *linux_dirent64;
433 	int buflen, error;
434 	size_t retval;
435 
436 	buflen = min(args->count, MAXBSIZE);
437 	buf = malloc(buflen, M_TEMP, M_WAITOK);
438 
439 	error = kern_getdirentries(td, args->fd, buf, buflen,
440 	    &base, NULL, UIO_SYSSPACE);
441 	if (error != 0) {
442 		error = linux_getdents_error(td, args->fd, error);
443 		goto out1;
444 	}
445 
446 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
447 
448 	len = td->td_retval[0];
449 	inp = buf;
450 	outp = (caddr_t)args->dirent;
451 	resid = args->count;
452 	retval = 0;
453 
454 	while (len > 0) {
455 		bdp = (struct dirent *) inp;
456 		reclen = bdp->d_reclen;
457 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
458 		/*
459 		 * No more space in the user supplied dirent buffer.
460 		 * Return EINVAL.
461 		 */
462 		if (resid < linuxreclen) {
463 			error = EINVAL;
464 			goto out;
465 		}
466 
467 		linux_dirent64 = (struct l_dirent64*)lbuf;
468 		linux_dirent64->d_ino = bdp->d_fileno;
469 		linux_dirent64->d_off = base + reclen;
470 		linux_dirent64->d_reclen = linuxreclen;
471 		linux_dirent64->d_type = bdp->d_type;
472 		strlcpy(linux_dirent64->d_name, bdp->d_name,
473 		    linuxreclen - offsetof(struct l_dirent64, d_name));
474 		error = copyout(linux_dirent64, outp, linuxreclen);
475 		if (error != 0)
476 			goto out;
477 
478 		inp += reclen;
479 		base += reclen;
480 		len -= reclen;
481 
482 		retval += linuxreclen;
483 		outp += linuxreclen;
484 		resid -= linuxreclen;
485 	}
486 	td->td_retval[0] = retval;
487 
488 out:
489 	free(lbuf, M_TEMP);
490 out1:
491 	free(buf, M_TEMP);
492 	return (error);
493 }
494 
495 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
496 int
497 linux_readdir(struct thread *td, struct linux_readdir_args *args)
498 {
499 	struct dirent *bdp;
500 	caddr_t buf;			/* BSD-format */
501 	int linuxreclen;		/* Linux-format */
502 	caddr_t lbuf;			/* Linux-format */
503 	off_t base;
504 	struct l_dirent *linux_dirent;
505 	int buflen, error;
506 
507 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
508 	buf = malloc(buflen, M_TEMP, M_WAITOK);
509 
510 	error = kern_getdirentries(td, args->fd, buf, buflen,
511 	    &base, NULL, UIO_SYSSPACE);
512 	if (error != 0) {
513 		error = linux_getdents_error(td, args->fd, error);
514 		goto out;
515 	}
516 	if (td->td_retval[0] == 0)
517 		goto out;
518 
519 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
520 
521 	bdp = (struct dirent *) buf;
522 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
523 
524 	linux_dirent = (struct l_dirent*)lbuf;
525 	linux_dirent->d_ino = bdp->d_fileno;
526 	linux_dirent->d_off = linuxreclen;
527 	linux_dirent->d_reclen = bdp->d_namlen;
528 	strlcpy(linux_dirent->d_name, bdp->d_name,
529 	    linuxreclen - offsetof(struct l_dirent, d_name));
530 	error = copyout(linux_dirent, args->dent, linuxreclen);
531 	if (error == 0)
532 		td->td_retval[0] = linuxreclen;
533 
534 	free(lbuf, M_TEMP);
535 out:
536 	free(buf, M_TEMP);
537 	return (error);
538 }
539 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
540 
541 /*
542  * These exist mainly for hooks for doing /compat/linux translation.
543  */
544 
545 #ifdef LINUX_LEGACY_SYSCALLS
546 int
547 linux_access(struct thread *td, struct linux_access_args *args)
548 {
549 	char *path;
550 	int error;
551 
552 	/* Linux convention. */
553 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
554 		return (EINVAL);
555 
556 	if (!LUSECONVPATH(td)) {
557 		error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0,
558 		    args->amode);
559 	} else {
560 		LCONVPATHEXIST(td, args->path, &path);
561 		error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
562 		    args->amode);
563 		LFREEPATH(path);
564 	}
565 
566 	return (error);
567 }
568 #endif
569 
570 int
571 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
572 {
573 	char *path;
574 	int error, dfd;
575 
576 	/* Linux convention. */
577 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
578 		return (EINVAL);
579 
580 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
581 	if (!LUSECONVPATH(td)) {
582 		error = kern_accessat(td, dfd, args->filename, UIO_USERSPACE, 0, args->amode);
583 	} else {
584 		LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
585 		error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
586 		LFREEPATH(path);
587 	}
588 
589 	return (error);
590 }
591 
592 #ifdef LINUX_LEGACY_SYSCALLS
593 int
594 linux_unlink(struct thread *td, struct linux_unlink_args *args)
595 {
596 	char *path;
597 	int error;
598 	struct stat st;
599 
600 	if (!LUSECONVPATH(td)) {
601 		error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE,
602 		    UIO_USERSPACE, 0, 0);
603 		if (error == EPERM) {
604 			/* Introduce POSIX noncompliant behaviour of Linux */
605 			if (kern_statat(td, 0, AT_FDCWD, args->path,
606 			    UIO_SYSSPACE, &st, NULL) == 0) {
607 				if (S_ISDIR(st.st_mode))
608 					error = EISDIR;
609 			}
610 		}
611 	} else {
612 		LCONVPATHEXIST(td, args->path, &path);
613 		error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
614 		if (error == EPERM) {
615 			/* Introduce POSIX noncompliant behaviour of Linux */
616 			if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
617 			    NULL) == 0) {
618 				if (S_ISDIR(st.st_mode))
619 					error = EISDIR;
620 			}
621 		}
622 		LFREEPATH(path);
623 	}
624 
625 	return (error);
626 }
627 #endif
628 
629 static int
630 linux_unlinkat_impl(struct thread *td, enum uio_seg pathseg, const char *path,
631     int dfd, struct linux_unlinkat_args *args)
632 {
633 	struct stat st;
634 	int error;
635 
636 	if (args->flag & LINUX_AT_REMOVEDIR)
637 		error = kern_frmdirat(td, dfd, path, FD_NONE, pathseg, 0);
638 	else
639 		error = kern_funlinkat(td, dfd, path, FD_NONE, pathseg, 0, 0);
640 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
641 		/* Introduce POSIX noncompliant behaviour of Linux */
642 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
643 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
644 			error = EISDIR;
645 	}
646 	return (error);
647 }
648 
649 int
650 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
651 {
652 	char *path;
653 	int error, dfd;
654 
655 	if (args->flag & ~LINUX_AT_REMOVEDIR)
656 		return (EINVAL);
657 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
658 	if (!LUSECONVPATH(td)) {
659 		return (linux_unlinkat_impl(td, UIO_USERSPACE, args->pathname,
660 		    dfd, args));
661 	}
662 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
663 	error = linux_unlinkat_impl(td, UIO_SYSSPACE, path, dfd, args);
664 	LFREEPATH(path);
665 	return (error);
666 }
667 int
668 linux_chdir(struct thread *td, struct linux_chdir_args *args)
669 {
670 	char *path;
671 	int error;
672 
673 	if (!LUSECONVPATH(td)) {
674 		return (kern_chdir(td, args->path, UIO_USERSPACE));
675 	}
676 	LCONVPATHEXIST(td, args->path, &path);
677 	error = kern_chdir(td, path, UIO_SYSSPACE);
678 	LFREEPATH(path);
679 	return (error);
680 }
681 
682 #ifdef LINUX_LEGACY_SYSCALLS
683 int
684 linux_chmod(struct thread *td, struct linux_chmod_args *args)
685 {
686 	char *path;
687 	int error;
688 
689 	if (!LUSECONVPATH(td)) {
690 		return (kern_fchmodat(td, AT_FDCWD, args->path, UIO_USERSPACE,
691 		    args->mode, 0));
692 	}
693 	LCONVPATHEXIST(td, args->path, &path);
694 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0);
695 	LFREEPATH(path);
696 	return (error);
697 }
698 #endif
699 
700 int
701 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
702 {
703 	char *path;
704 	int error, dfd;
705 
706 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
707 	if (!LUSECONVPATH(td)) {
708 		return (kern_fchmodat(td, dfd, args->filename, UIO_USERSPACE,
709 		    args->mode, 0));
710 	}
711 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
712 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
713 	LFREEPATH(path);
714 	return (error);
715 }
716 
717 #ifdef LINUX_LEGACY_SYSCALLS
718 int
719 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
720 {
721 	char *path;
722 	int error;
723 
724 	if (!LUSECONVPATH(td)) {
725 		return (kern_mkdirat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode));
726 	}
727 	LCONVPATHCREAT(td, args->path, &path);
728 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
729 	LFREEPATH(path);
730 	return (error);
731 }
732 #endif
733 
734 int
735 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
736 {
737 	char *path;
738 	int error, dfd;
739 
740 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
741 	if (!LUSECONVPATH(td)) {
742 		return (kern_mkdirat(td, dfd, args->pathname, UIO_USERSPACE, args->mode));
743 	}
744 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
745 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
746 	LFREEPATH(path);
747 	return (error);
748 }
749 
750 #ifdef LINUX_LEGACY_SYSCALLS
751 int
752 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
753 {
754 	char *path;
755 	int error;
756 
757 	if (!LUSECONVPATH(td)) {
758 		return (kern_frmdirat(td, AT_FDCWD, args->path, FD_NONE,
759 		    UIO_USERSPACE, 0));
760 	}
761 	LCONVPATHEXIST(td, args->path, &path);
762 	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
763 	LFREEPATH(path);
764 	return (error);
765 }
766 
767 int
768 linux_rename(struct thread *td, struct linux_rename_args *args)
769 {
770 	char *from, *to;
771 	int error;
772 
773 	if (!LUSECONVPATH(td)) {
774 		return (kern_renameat(td, AT_FDCWD, args->from, AT_FDCWD,
775 		    args->to, UIO_USERSPACE));
776 	}
777 	LCONVPATHEXIST(td, args->from, &from);
778 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
779 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
780 	if (to == NULL) {
781 		LFREEPATH(from);
782 		return (error);
783 	}
784 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
785 	LFREEPATH(from);
786 	LFREEPATH(to);
787 	return (error);
788 }
789 #endif
790 
791 int
792 linux_renameat(struct thread *td, struct linux_renameat_args *args)
793 {
794 	struct linux_renameat2_args renameat2_args = {
795 	    .olddfd = args->olddfd,
796 	    .oldname = args->oldname,
797 	    .newdfd = args->newdfd,
798 	    .newname = args->newname,
799 	    .flags = 0
800 	};
801 
802 	return (linux_renameat2(td, &renameat2_args));
803 }
804 
805 int
806 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
807 {
808 	char *from, *to;
809 	int error, olddfd, newdfd;
810 
811 	if (args->flags != 0) {
812 		if (args->flags & ~(LINUX_RENAME_EXCHANGE |
813 		    LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
814 			return (EINVAL);
815 		if (args->flags & LINUX_RENAME_EXCHANGE &&
816 		    args->flags & (LINUX_RENAME_NOREPLACE |
817 		    LINUX_RENAME_WHITEOUT))
818 			return (EINVAL);
819 #if 0
820 		/*
821 		 * This spams the console on Ubuntu Focal.
822 		 *
823 		 * What's needed here is a general mechanism to let users know
824 		 * about missing features without hogging the system.
825 		 */
826 		linux_msg(td, "renameat2 unsupported flags 0x%x",
827 		    args->flags);
828 #endif
829 		return (EINVAL);
830 	}
831 
832 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
833 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
834 	if (!LUSECONVPATH(td)) {
835 		return (kern_renameat(td, olddfd, args->oldname, newdfd,
836 		    args->newname, UIO_USERSPACE));
837 	}
838 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
839 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
840 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
841 	if (to == NULL) {
842 		LFREEPATH(from);
843 		return (error);
844 	}
845 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
846 	LFREEPATH(from);
847 	LFREEPATH(to);
848 	return (error);
849 }
850 
851 #ifdef LINUX_LEGACY_SYSCALLS
852 int
853 linux_symlink(struct thread *td, struct linux_symlink_args *args)
854 {
855 	char *path, *to;
856 	int error;
857 
858 	if (!LUSECONVPATH(td)) {
859 		return (kern_symlinkat(td, args->path, AT_FDCWD, args->to,
860 		    UIO_USERSPACE));
861 	}
862 	LCONVPATHEXIST(td, args->path, &path);
863 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
864 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
865 	if (to == NULL) {
866 		LFREEPATH(path);
867 		return (error);
868 	}
869 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
870 	LFREEPATH(path);
871 	LFREEPATH(to);
872 	return (error);
873 }
874 #endif
875 
876 int
877 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
878 {
879 	char *path, *to;
880 	int error, dfd;
881 
882 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
883 	if (!LUSECONVPATH(td)) {
884 		return (kern_symlinkat(td, args->oldname, dfd, args->newname,
885 		    UIO_USERSPACE));
886 	}
887 	LCONVPATHEXIST(td, args->oldname, &path);
888 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
889 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
890 	if (to == NULL) {
891 		LFREEPATH(path);
892 		return (error);
893 	}
894 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
895 	LFREEPATH(path);
896 	LFREEPATH(to);
897 	return (error);
898 }
899 
900 #ifdef LINUX_LEGACY_SYSCALLS
901 int
902 linux_readlink(struct thread *td, struct linux_readlink_args *args)
903 {
904 	char *name;
905 	int error;
906 
907 	if (!LUSECONVPATH(td)) {
908 		return (kern_readlinkat(td, AT_FDCWD, args->name, UIO_USERSPACE,
909 		    args->buf, UIO_USERSPACE, args->count));
910 	}
911 	LCONVPATHEXIST(td, args->name, &name);
912 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
913 	    args->buf, UIO_USERSPACE, args->count);
914 	LFREEPATH(name);
915 	return (error);
916 }
917 #endif
918 
919 int
920 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
921 {
922 	char *name;
923 	int error, dfd;
924 
925 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
926 	if (!LUSECONVPATH(td)) {
927 		return (kern_readlinkat(td, dfd, args->path, UIO_USERSPACE,
928 		    args->buf, UIO_USERSPACE, args->bufsiz));
929 	}
930 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
931 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
932 	    UIO_USERSPACE, args->bufsiz);
933 	LFREEPATH(name);
934 	return (error);
935 }
936 
937 int
938 linux_truncate(struct thread *td, struct linux_truncate_args *args)
939 {
940 	char *path;
941 	int error;
942 
943 	if (!LUSECONVPATH(td)) {
944 		return (kern_truncate(td, args->path, UIO_USERSPACE, args->length));
945 	}
946 	LCONVPATHEXIST(td, args->path, &path);
947 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
948 	LFREEPATH(path);
949 	return (error);
950 }
951 
952 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
953 int
954 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
955 {
956 	char *path;
957 	off_t length;
958 	int error;
959 
960 #if defined(__amd64__) && defined(COMPAT_LINUX32)
961 	length = PAIR32TO64(off_t, args->length);
962 #else
963 	length = args->length;
964 #endif
965 
966 	if (!LUSECONVPATH(td)) {
967 		return (kern_truncate(td, args->path, UIO_USERSPACE, length));
968 	}
969 	LCONVPATHEXIST(td, args->path, &path);
970 	error = kern_truncate(td, path, UIO_SYSSPACE, length);
971 	LFREEPATH(path);
972 	return (error);
973 }
974 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
975 
976 int
977 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
978 {
979 
980 	return (kern_ftruncate(td, args->fd, args->length));
981 }
982 
983 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
984 int
985 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
986 {
987 	off_t length;
988 
989 #if defined(__amd64__) && defined(COMPAT_LINUX32)
990 	length = PAIR32TO64(off_t, args->length);
991 #else
992 	length = args->length;
993 #endif
994 
995 	return (kern_ftruncate(td, args->fd, length));
996 }
997 #endif
998 
999 #ifdef LINUX_LEGACY_SYSCALLS
1000 int
1001 linux_link(struct thread *td, struct linux_link_args *args)
1002 {
1003 	char *path, *to;
1004 	int error;
1005 
1006 	if (!LUSECONVPATH(td)) {
1007 		return (kern_linkat(td, AT_FDCWD, AT_FDCWD, args->path, args->to,
1008 		    UIO_USERSPACE, FOLLOW));
1009 	}
1010 	LCONVPATHEXIST(td, args->path, &path);
1011 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1012 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
1013 	if (to == NULL) {
1014 		LFREEPATH(path);
1015 		return (error);
1016 	}
1017 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
1018 	    FOLLOW);
1019 	LFREEPATH(path);
1020 	LFREEPATH(to);
1021 	return (error);
1022 }
1023 #endif
1024 
1025 int
1026 linux_linkat(struct thread *td, struct linux_linkat_args *args)
1027 {
1028 	char *path, *to;
1029 	int error, olddfd, newdfd, follow;
1030 
1031 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
1032 		return (EINVAL);
1033 
1034 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
1035 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
1036 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
1037 	    FOLLOW;
1038 	if (!LUSECONVPATH(td)) {
1039 		return (kern_linkat(td, olddfd, newdfd, args->oldname,
1040 		    args->newname, UIO_USERSPACE, follow));
1041 	}
1042 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
1043 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1044 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
1045 	if (to == NULL) {
1046 		LFREEPATH(path);
1047 		return (error);
1048 	}
1049 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
1050 	LFREEPATH(path);
1051 	LFREEPATH(to);
1052 	return (error);
1053 }
1054 
1055 int
1056 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
1057 {
1058 
1059 	return (kern_fsync(td, uap->fd, false));
1060 }
1061 
1062 int
1063 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
1064 {
1065 	off_t nbytes, offset;
1066 
1067 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1068 	nbytes = PAIR32TO64(off_t, uap->nbytes);
1069 	offset = PAIR32TO64(off_t, uap->offset);
1070 #else
1071 	nbytes = uap->nbytes;
1072 	offset = uap->offset;
1073 #endif
1074 
1075 	if (offset < 0 || nbytes < 0 ||
1076 	    (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
1077 	    LINUX_SYNC_FILE_RANGE_WRITE |
1078 	    LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
1079 		return (EINVAL);
1080 	}
1081 
1082 	return (kern_fsync(td, uap->fd, false));
1083 }
1084 
1085 int
1086 linux_pread(struct thread *td, struct linux_pread_args *uap)
1087 {
1088 	struct vnode *vp;
1089 	off_t offset;
1090 	int error;
1091 
1092 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1093 	offset = PAIR32TO64(off_t, uap->offset);
1094 #else
1095 	offset = uap->offset;
1096 #endif
1097 
1098 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
1099 	if (error == 0) {
1100 		/* This seems to violate POSIX but Linux does it. */
1101 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
1102 		if (error != 0)
1103 			return (error);
1104 		if (vp->v_type == VDIR)
1105 			error = EISDIR;
1106 		vrele(vp);
1107 	}
1108 	return (error);
1109 }
1110 
1111 int
1112 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
1113 {
1114 	off_t offset;
1115 
1116 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1117 	offset = PAIR32TO64(off_t, uap->offset);
1118 #else
1119 	offset = uap->offset;
1120 #endif
1121 
1122 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
1123 }
1124 
1125 int
1126 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
1127 {
1128 	struct uio *auio;
1129 	int error;
1130 	off_t offset;
1131 
1132 	/*
1133 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1134 	 * pos_l and pos_h, respectively, contain the
1135 	 * low order and high order 32 bits of offset.
1136 	 */
1137 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1138 	    (sizeof(offset) * 4)) | uap->pos_l;
1139 	if (offset < 0)
1140 		return (EINVAL);
1141 #ifdef COMPAT_LINUX32
1142 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1143 #else
1144 	error = copyinuio(uap->vec, uap->vlen, &auio);
1145 #endif
1146 	if (error != 0)
1147 		return (error);
1148 	error = kern_preadv(td, uap->fd, auio, offset);
1149 	free(auio, M_IOV);
1150 	return (error);
1151 }
1152 
1153 int
1154 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1155 {
1156 	struct uio *auio;
1157 	int error;
1158 	off_t offset;
1159 
1160 	/*
1161 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1162 	 * pos_l and pos_h, respectively, contain the
1163 	 * low order and high order 32 bits of offset.
1164 	 */
1165 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1166 	    (sizeof(offset) * 4)) | uap->pos_l;
1167 	if (offset < 0)
1168 		return (EINVAL);
1169 #ifdef COMPAT_LINUX32
1170 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1171 #else
1172 	error = copyinuio(uap->vec, uap->vlen, &auio);
1173 #endif
1174 	if (error != 0)
1175 		return (error);
1176 	error = kern_pwritev(td, uap->fd, auio, offset);
1177 	free(auio, M_IOV);
1178 	return (error);
1179 }
1180 
1181 int
1182 linux_mount(struct thread *td, struct linux_mount_args *args)
1183 {
1184 	struct mntarg *ma = NULL;
1185 	char *fstypename, *mntonname, *mntfromname, *data;
1186 	int error, fsflags;
1187 
1188 	fstypename = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1189 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1190 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1191 	data = NULL;
1192 	error = copyinstr(args->filesystemtype, fstypename, MNAMELEN - 1,
1193 	    NULL);
1194 	if (error != 0)
1195 		goto out;
1196 	if (args->specialfile != NULL) {
1197 		error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1198 		if (error != 0)
1199 			goto out;
1200 	} else {
1201 		mntfromname[0] = '\0';
1202 	}
1203 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1204 	if (error != 0)
1205 		goto out;
1206 
1207 	if (strcmp(fstypename, "ext2") == 0) {
1208 		strcpy(fstypename, "ext2fs");
1209 	} else if (strcmp(fstypename, "proc") == 0) {
1210 		strcpy(fstypename, "linprocfs");
1211 	} else if (strcmp(fstypename, "vfat") == 0) {
1212 		strcpy(fstypename, "msdosfs");
1213 	} else if (strcmp(fstypename, "fuse") == 0) {
1214 		char *fuse_options, *fuse_option, *fuse_name;
1215 
1216 		if (strcmp(mntfromname, "fuse") == 0)
1217 			strcpy(mntfromname, "/dev/fuse");
1218 
1219 		strcpy(fstypename, "fusefs");
1220 		data = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1221 		error = copyinstr(args->data, data, MNAMELEN - 1, NULL);
1222 		if (error != 0)
1223 			goto out;
1224 
1225 		fuse_options = data;
1226 		while ((fuse_option = strsep(&fuse_options, ",")) != NULL) {
1227 			fuse_name = strsep(&fuse_option, "=");
1228 			if (fuse_name == NULL || fuse_option == NULL)
1229 				goto out;
1230 			ma = mount_arg(ma, fuse_name, fuse_option, -1);
1231 		}
1232 
1233 		/*
1234 		 * The FUSE server uses Linux errno values instead of FreeBSD
1235 		 * ones; add a flag to tell fuse(4) to do errno translation.
1236 		 */
1237 		ma = mount_arg(ma, "linux_errnos", "1", -1);
1238 	}
1239 
1240 	fsflags = 0;
1241 
1242 	/*
1243 	 * Linux SYNC flag is not included; the closest equivalent
1244 	 * FreeBSD has is !ASYNC, which is our default.
1245 	 */
1246 	if (args->rwflag & LINUX_MS_RDONLY)
1247 		fsflags |= MNT_RDONLY;
1248 	if (args->rwflag & LINUX_MS_NOSUID)
1249 		fsflags |= MNT_NOSUID;
1250 	if (args->rwflag & LINUX_MS_NOEXEC)
1251 		fsflags |= MNT_NOEXEC;
1252 	if (args->rwflag & LINUX_MS_REMOUNT)
1253 		fsflags |= MNT_UPDATE;
1254 
1255 	ma = mount_arg(ma, "fstype", fstypename, -1);
1256 	ma = mount_arg(ma, "fspath", mntonname, -1);
1257 	ma = mount_arg(ma, "from", mntfromname, -1);
1258 	error = kernel_mount(ma, fsflags);
1259 out:
1260 	free(fstypename, M_TEMP);
1261 	free(mntonname, M_TEMP);
1262 	free(mntfromname, M_TEMP);
1263 	free(data, M_TEMP);
1264 	return (error);
1265 }
1266 
1267 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1268 int
1269 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1270 {
1271 
1272 	return (kern_unmount(td, args->path, 0));
1273 }
1274 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1275 
1276 #ifdef LINUX_LEGACY_SYSCALLS
1277 int
1278 linux_umount(struct thread *td, struct linux_umount_args *args)
1279 {
1280 	int flags;
1281 
1282 	flags = 0;
1283 	if ((args->flags & LINUX_MNT_FORCE) != 0) {
1284 		args->flags &= ~LINUX_MNT_FORCE;
1285 		flags |= MNT_FORCE;
1286 	}
1287 	if (args->flags != 0) {
1288 		linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1289 		return (EINVAL);
1290 	}
1291 
1292 	return (kern_unmount(td, args->path, flags));
1293 }
1294 #endif
1295 
1296 /*
1297  * fcntl family of syscalls
1298  */
1299 
1300 struct l_flock {
1301 	l_short		l_type;
1302 	l_short		l_whence;
1303 	l_off_t		l_start;
1304 	l_off_t		l_len;
1305 	l_pid_t		l_pid;
1306 }
1307 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1308 __packed
1309 #endif
1310 ;
1311 
1312 static void
1313 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1314 {
1315 	switch (linux_flock->l_type) {
1316 	case LINUX_F_RDLCK:
1317 		bsd_flock->l_type = F_RDLCK;
1318 		break;
1319 	case LINUX_F_WRLCK:
1320 		bsd_flock->l_type = F_WRLCK;
1321 		break;
1322 	case LINUX_F_UNLCK:
1323 		bsd_flock->l_type = F_UNLCK;
1324 		break;
1325 	default:
1326 		bsd_flock->l_type = -1;
1327 		break;
1328 	}
1329 	bsd_flock->l_whence = linux_flock->l_whence;
1330 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1331 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1332 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1333 	bsd_flock->l_sysid = 0;
1334 }
1335 
1336 static void
1337 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1338 {
1339 	switch (bsd_flock->l_type) {
1340 	case F_RDLCK:
1341 		linux_flock->l_type = LINUX_F_RDLCK;
1342 		break;
1343 	case F_WRLCK:
1344 		linux_flock->l_type = LINUX_F_WRLCK;
1345 		break;
1346 	case F_UNLCK:
1347 		linux_flock->l_type = LINUX_F_UNLCK;
1348 		break;
1349 	}
1350 	linux_flock->l_whence = bsd_flock->l_whence;
1351 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1352 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1353 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1354 }
1355 
1356 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1357 struct l_flock64 {
1358 	l_short		l_type;
1359 	l_short		l_whence;
1360 	l_loff_t	l_start;
1361 	l_loff_t	l_len;
1362 	l_pid_t		l_pid;
1363 }
1364 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1365 __packed
1366 #endif
1367 ;
1368 
1369 static void
1370 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1371 {
1372 	switch (linux_flock->l_type) {
1373 	case LINUX_F_RDLCK:
1374 		bsd_flock->l_type = F_RDLCK;
1375 		break;
1376 	case LINUX_F_WRLCK:
1377 		bsd_flock->l_type = F_WRLCK;
1378 		break;
1379 	case LINUX_F_UNLCK:
1380 		bsd_flock->l_type = F_UNLCK;
1381 		break;
1382 	default:
1383 		bsd_flock->l_type = -1;
1384 		break;
1385 	}
1386 	bsd_flock->l_whence = linux_flock->l_whence;
1387 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1388 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1389 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1390 	bsd_flock->l_sysid = 0;
1391 }
1392 
1393 static void
1394 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1395 {
1396 	switch (bsd_flock->l_type) {
1397 	case F_RDLCK:
1398 		linux_flock->l_type = LINUX_F_RDLCK;
1399 		break;
1400 	case F_WRLCK:
1401 		linux_flock->l_type = LINUX_F_WRLCK;
1402 		break;
1403 	case F_UNLCK:
1404 		linux_flock->l_type = LINUX_F_UNLCK;
1405 		break;
1406 	}
1407 	linux_flock->l_whence = bsd_flock->l_whence;
1408 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1409 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1410 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1411 }
1412 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1413 
1414 static int
1415 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1416 {
1417 	struct l_flock linux_flock;
1418 	struct flock bsd_flock;
1419 	struct file *fp;
1420 	long arg;
1421 	int error, result;
1422 
1423 	switch (args->cmd) {
1424 	case LINUX_F_DUPFD:
1425 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1426 
1427 	case LINUX_F_GETFD:
1428 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1429 
1430 	case LINUX_F_SETFD:
1431 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1432 
1433 	case LINUX_F_GETFL:
1434 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1435 		result = td->td_retval[0];
1436 		td->td_retval[0] = 0;
1437 		if (result & O_RDONLY)
1438 			td->td_retval[0] |= LINUX_O_RDONLY;
1439 		if (result & O_WRONLY)
1440 			td->td_retval[0] |= LINUX_O_WRONLY;
1441 		if (result & O_RDWR)
1442 			td->td_retval[0] |= LINUX_O_RDWR;
1443 		if (result & O_NDELAY)
1444 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1445 		if (result & O_APPEND)
1446 			td->td_retval[0] |= LINUX_O_APPEND;
1447 		if (result & O_FSYNC)
1448 			td->td_retval[0] |= LINUX_O_SYNC;
1449 		if (result & O_ASYNC)
1450 			td->td_retval[0] |= LINUX_O_ASYNC;
1451 #ifdef LINUX_O_NOFOLLOW
1452 		if (result & O_NOFOLLOW)
1453 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1454 #endif
1455 #ifdef LINUX_O_DIRECT
1456 		if (result & O_DIRECT)
1457 			td->td_retval[0] |= LINUX_O_DIRECT;
1458 #endif
1459 		return (error);
1460 
1461 	case LINUX_F_SETFL:
1462 		arg = 0;
1463 		if (args->arg & LINUX_O_NDELAY)
1464 			arg |= O_NONBLOCK;
1465 		if (args->arg & LINUX_O_APPEND)
1466 			arg |= O_APPEND;
1467 		if (args->arg & LINUX_O_SYNC)
1468 			arg |= O_FSYNC;
1469 		if (args->arg & LINUX_O_ASYNC)
1470 			arg |= O_ASYNC;
1471 #ifdef LINUX_O_NOFOLLOW
1472 		if (args->arg & LINUX_O_NOFOLLOW)
1473 			arg |= O_NOFOLLOW;
1474 #endif
1475 #ifdef LINUX_O_DIRECT
1476 		if (args->arg & LINUX_O_DIRECT)
1477 			arg |= O_DIRECT;
1478 #endif
1479 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1480 
1481 	case LINUX_F_GETLK:
1482 		error = copyin((void *)args->arg, &linux_flock,
1483 		    sizeof(linux_flock));
1484 		if (error)
1485 			return (error);
1486 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1487 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1488 		if (error)
1489 			return (error);
1490 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1491 		return (copyout(&linux_flock, (void *)args->arg,
1492 		    sizeof(linux_flock)));
1493 
1494 	case LINUX_F_SETLK:
1495 		error = copyin((void *)args->arg, &linux_flock,
1496 		    sizeof(linux_flock));
1497 		if (error)
1498 			return (error);
1499 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1500 		return (kern_fcntl(td, args->fd, F_SETLK,
1501 		    (intptr_t)&bsd_flock));
1502 
1503 	case LINUX_F_SETLKW:
1504 		error = copyin((void *)args->arg, &linux_flock,
1505 		    sizeof(linux_flock));
1506 		if (error)
1507 			return (error);
1508 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1509 		return (kern_fcntl(td, args->fd, F_SETLKW,
1510 		     (intptr_t)&bsd_flock));
1511 
1512 	case LINUX_F_GETOWN:
1513 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1514 
1515 	case LINUX_F_SETOWN:
1516 		/*
1517 		 * XXX some Linux applications depend on F_SETOWN having no
1518 		 * significant effect for pipes (SIGIO is not delivered for
1519 		 * pipes under Linux-2.2.35 at least).
1520 		 */
1521 		error = fget(td, args->fd,
1522 		    &cap_fcntl_rights, &fp);
1523 		if (error)
1524 			return (error);
1525 		if (fp->f_type == DTYPE_PIPE) {
1526 			fdrop(fp, td);
1527 			return (EINVAL);
1528 		}
1529 		fdrop(fp, td);
1530 
1531 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1532 
1533 	case LINUX_F_DUPFD_CLOEXEC:
1534 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1535 	/*
1536 	 * Our F_SEAL_* values match Linux one for maximum compatibility.  So we
1537 	 * only needed to account for different values for fcntl(2) commands.
1538 	 */
1539 	case LINUX_F_GET_SEALS:
1540 		error = kern_fcntl(td, args->fd, F_GET_SEALS, 0);
1541 		if (error != 0)
1542 			return (error);
1543 		td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0],
1544 		    seal_bitmap, 0);
1545 		return (0);
1546 
1547 	case LINUX_F_ADD_SEALS:
1548 		return (kern_fcntl(td, args->fd, F_ADD_SEALS,
1549 		    linux_to_bsd_bits(args->arg, seal_bitmap, 0)));
1550 	default:
1551 		linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd);
1552 		return (EINVAL);
1553 	}
1554 }
1555 
1556 int
1557 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1558 {
1559 
1560 	return (fcntl_common(td, args));
1561 }
1562 
1563 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1564 int
1565 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1566 {
1567 	struct l_flock64 linux_flock;
1568 	struct flock bsd_flock;
1569 	struct linux_fcntl_args fcntl_args;
1570 	int error;
1571 
1572 	switch (args->cmd) {
1573 	case LINUX_F_GETLK64:
1574 		error = copyin((void *)args->arg, &linux_flock,
1575 		    sizeof(linux_flock));
1576 		if (error)
1577 			return (error);
1578 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1579 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1580 		if (error)
1581 			return (error);
1582 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1583 		return (copyout(&linux_flock, (void *)args->arg,
1584 			    sizeof(linux_flock)));
1585 
1586 	case LINUX_F_SETLK64:
1587 		error = copyin((void *)args->arg, &linux_flock,
1588 		    sizeof(linux_flock));
1589 		if (error)
1590 			return (error);
1591 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1592 		return (kern_fcntl(td, args->fd, F_SETLK,
1593 		    (intptr_t)&bsd_flock));
1594 
1595 	case LINUX_F_SETLKW64:
1596 		error = copyin((void *)args->arg, &linux_flock,
1597 		    sizeof(linux_flock));
1598 		if (error)
1599 			return (error);
1600 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1601 		return (kern_fcntl(td, args->fd, F_SETLKW,
1602 		    (intptr_t)&bsd_flock));
1603 	}
1604 
1605 	fcntl_args.fd = args->fd;
1606 	fcntl_args.cmd = args->cmd;
1607 	fcntl_args.arg = args->arg;
1608 	return (fcntl_common(td, &fcntl_args));
1609 }
1610 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1611 
1612 #ifdef LINUX_LEGACY_SYSCALLS
1613 int
1614 linux_chown(struct thread *td, struct linux_chown_args *args)
1615 {
1616 	char *path;
1617 	int error;
1618 
1619 	if (!LUSECONVPATH(td)) {
1620 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE,
1621 		    args->uid, args->gid, 0));
1622 	}
1623 	LCONVPATHEXIST(td, args->path, &path);
1624 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1625 	    args->gid, 0);
1626 	LFREEPATH(path);
1627 	return (error);
1628 }
1629 #endif
1630 
1631 int
1632 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1633 {
1634 	char *path;
1635 	int error, dfd, flag;
1636 
1637 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
1638 		return (EINVAL);
1639 
1640 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
1641 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1642 	    AT_SYMLINK_NOFOLLOW;
1643 	if (!LUSECONVPATH(td)) {
1644 		return (kern_fchownat(td, dfd, args->filename, UIO_USERSPACE,
1645 		    args->uid, args->gid, flag));
1646 	}
1647 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1648 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1649 	    flag);
1650 	LFREEPATH(path);
1651 	return (error);
1652 }
1653 
1654 #ifdef LINUX_LEGACY_SYSCALLS
1655 int
1656 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1657 {
1658 	char *path;
1659 	int error;
1660 
1661 	if (!LUSECONVPATH(td)) {
1662 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->uid,
1663 		    args->gid, AT_SYMLINK_NOFOLLOW));
1664 	}
1665 	LCONVPATHEXIST(td, args->path, &path);
1666 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid,
1667 	    AT_SYMLINK_NOFOLLOW);
1668 	LFREEPATH(path);
1669 	return (error);
1670 }
1671 #endif
1672 
1673 static int
1674 convert_fadvice(int advice)
1675 {
1676 	switch (advice) {
1677 	case LINUX_POSIX_FADV_NORMAL:
1678 		return (POSIX_FADV_NORMAL);
1679 	case LINUX_POSIX_FADV_RANDOM:
1680 		return (POSIX_FADV_RANDOM);
1681 	case LINUX_POSIX_FADV_SEQUENTIAL:
1682 		return (POSIX_FADV_SEQUENTIAL);
1683 	case LINUX_POSIX_FADV_WILLNEED:
1684 		return (POSIX_FADV_WILLNEED);
1685 	case LINUX_POSIX_FADV_DONTNEED:
1686 		return (POSIX_FADV_DONTNEED);
1687 	case LINUX_POSIX_FADV_NOREUSE:
1688 		return (POSIX_FADV_NOREUSE);
1689 	default:
1690 		return (-1);
1691 	}
1692 }
1693 
1694 int
1695 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1696 {
1697 	off_t offset;
1698 	int advice;
1699 
1700 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1701 	offset = PAIR32TO64(off_t, args->offset);
1702 #else
1703 	offset = args->offset;
1704 #endif
1705 
1706 	advice = convert_fadvice(args->advice);
1707 	if (advice == -1)
1708 		return (EINVAL);
1709 	return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1710 }
1711 
1712 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1713 int
1714 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1715 {
1716 	off_t len, offset;
1717 	int advice;
1718 
1719 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1720 	len = PAIR32TO64(off_t, args->len);
1721 	offset = PAIR32TO64(off_t, args->offset);
1722 #else
1723 	len = args->len;
1724 	offset = args->offset;
1725 #endif
1726 
1727 	advice = convert_fadvice(args->advice);
1728 	if (advice == -1)
1729 		return (EINVAL);
1730 	return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1731 }
1732 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1733 
1734 #ifdef LINUX_LEGACY_SYSCALLS
1735 int
1736 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1737 {
1738 	int fildes[2];
1739 	int error;
1740 
1741 	error = kern_pipe(td, fildes, 0, NULL, NULL);
1742 	if (error != 0)
1743 		return (error);
1744 
1745 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1746 	if (error != 0) {
1747 		(void)kern_close(td, fildes[0]);
1748 		(void)kern_close(td, fildes[1]);
1749 	}
1750 
1751 	return (error);
1752 }
1753 #endif
1754 
1755 int
1756 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1757 {
1758 	int fildes[2];
1759 	int error, flags;
1760 
1761 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1762 		return (EINVAL);
1763 
1764 	flags = 0;
1765 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
1766 		flags |= O_NONBLOCK;
1767 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
1768 		flags |= O_CLOEXEC;
1769 	error = kern_pipe(td, fildes, flags, NULL, NULL);
1770 	if (error != 0)
1771 		return (error);
1772 
1773 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1774 	if (error != 0) {
1775 		(void)kern_close(td, fildes[0]);
1776 		(void)kern_close(td, fildes[1]);
1777 	}
1778 
1779 	return (error);
1780 }
1781 
1782 int
1783 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1784 {
1785 	int cmd;
1786 	intptr_t newfd;
1787 
1788 	if (args->oldfd == args->newfd)
1789 		return (EINVAL);
1790 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1791 		return (EINVAL);
1792 	if (args->flags & LINUX_O_CLOEXEC)
1793 		cmd = F_DUP2FD_CLOEXEC;
1794 	else
1795 		cmd = F_DUP2FD;
1796 
1797 	newfd = args->newfd;
1798 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
1799 }
1800 
1801 int
1802 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1803 {
1804 	off_t len, offset;
1805 
1806 	/*
1807 	 * We emulate only posix_fallocate system call for which
1808 	 * mode should be 0.
1809 	 */
1810 	if (args->mode != 0)
1811 		return (EOPNOTSUPP);
1812 
1813 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1814 	len = PAIR32TO64(off_t, args->len);
1815 	offset = PAIR32TO64(off_t, args->offset);
1816 #else
1817 	len = args->len;
1818 	offset = args->offset;
1819 #endif
1820 
1821 	return (kern_posix_fallocate(td, args->fd, offset, len));
1822 }
1823 
1824 int
1825 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1826     *args)
1827 {
1828 	l_loff_t inoff, outoff, *inoffp, *outoffp;
1829 	int error, flags;
1830 
1831 	/*
1832 	 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1833 	 * the native implementation.  Enforce it.
1834 	 */
1835 	if (args->flags != 0) {
1836 		linux_msg(td, "copy_file_range unsupported flags 0x%x",
1837 		    args->flags);
1838 		return (EINVAL);
1839 	}
1840 	flags = 0;
1841 	inoffp = outoffp = NULL;
1842 	if (args->off_in != NULL) {
1843 		error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1844 		if (error != 0)
1845 			return (error);
1846 		inoffp = &inoff;
1847 	}
1848 	if (args->off_out != NULL) {
1849 		error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1850 		if (error != 0)
1851 			return (error);
1852 		outoffp = &outoff;
1853 	}
1854 
1855 	error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1856 	    outoffp, args->len, flags);
1857 	if (error == 0 && args->off_in != NULL)
1858 		error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1859 	if (error == 0 && args->off_out != NULL)
1860 		error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1861 	return (error);
1862 }
1863 
1864 #define	LINUX_MEMFD_PREFIX	"memfd:"
1865 
1866 int
1867 linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
1868 {
1869 	char memfd_name[LINUX_NAME_MAX + 1];
1870 	int error, flags, shmflags, oflags;
1871 
1872 	/*
1873 	 * This is our clever trick to avoid the heap allocation to copy in the
1874 	 * uname.  We don't really need to go this far out of our way, but it
1875 	 * does keep the rest of this function fairly clean as they don't have
1876 	 * to worry about cleanup on the way out.
1877 	 */
1878 	error = copyinstr(args->uname_ptr,
1879 	    memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1,
1880 	    LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL);
1881 	if (error != 0) {
1882 		if (error == ENAMETOOLONG)
1883 			error = EINVAL;
1884 		return (error);
1885 	}
1886 
1887 	memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1);
1888 	flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0);
1889 	if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB |
1890 	    MFD_HUGE_MASK)) != 0)
1891 		return (EINVAL);
1892 	/* Size specified but no HUGETLB. */
1893 	if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0)
1894 		return (EINVAL);
1895 	/* We don't actually support HUGETLB. */
1896 	if ((flags & MFD_HUGETLB) != 0)
1897 		return (ENOSYS);
1898 	oflags = O_RDWR;
1899 	shmflags = SHM_GROW_ON_WRITE;
1900 	if ((flags & MFD_CLOEXEC) != 0)
1901 		oflags |= O_CLOEXEC;
1902 	if ((flags & MFD_ALLOW_SEALING) != 0)
1903 		shmflags |= SHM_ALLOW_SEALING;
1904 	return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
1905 	    memfd_name));
1906 }
1907 
1908 int
1909 linux_splice(struct thread *td, struct linux_splice_args *args)
1910 {
1911 
1912 	linux_msg(td, "syscall splice not really implemented");
1913 
1914 	/*
1915 	 * splice(2) is documented to return EINVAL in various circumstances;
1916 	 * returning it instead of ENOSYS should hint the caller to use fallback
1917 	 * instead.
1918 	 */
1919 	return (EINVAL);
1920 }
1921