xref: /freebsd/sys/compat/linux/linux_file.c (revision bce7ee9d412b6410e6d799c4a417617cbb148e09)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1995 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/stat.h>
50 #include <sys/sx.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
53 #include <sys/tty.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 
57 #ifdef COMPAT_LINUX32
58 #include <compat/freebsd32/freebsd32_misc.h>
59 #include <machine/../linux32/linux.h>
60 #include <machine/../linux32/linux32_proto.h>
61 #else
62 #include <machine/../linux/linux.h>
63 #include <machine/../linux/linux_proto.h>
64 #endif
65 #include <compat/linux/linux_misc.h>
66 #include <compat/linux/linux_util.h>
67 #include <compat/linux/linux_file.h>
68 
69 static int	linux_common_open(struct thread *, int, const char *, int, int,
70 		    enum uio_seg);
71 static int	linux_getdents_error(struct thread *, int, int);
72 
73 static struct bsd_to_linux_bitmap seal_bitmap[] = {
74 	BITMAP_1t1_LINUX(F_SEAL_SEAL),
75 	BITMAP_1t1_LINUX(F_SEAL_SHRINK),
76 	BITMAP_1t1_LINUX(F_SEAL_GROW),
77 	BITMAP_1t1_LINUX(F_SEAL_WRITE),
78 };
79 
80 #define	MFD_HUGETLB_ENTRY(_size)					\
81 	{								\
82 		.bsd_value = MFD_HUGE_##_size,				\
83 		.linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size	\
84 	}
85 static struct bsd_to_linux_bitmap mfd_bitmap[] = {
86 	BITMAP_1t1_LINUX(MFD_CLOEXEC),
87 	BITMAP_1t1_LINUX(MFD_ALLOW_SEALING),
88 	BITMAP_1t1_LINUX(MFD_HUGETLB),
89 	MFD_HUGETLB_ENTRY(64KB),
90 	MFD_HUGETLB_ENTRY(512KB),
91 	MFD_HUGETLB_ENTRY(1MB),
92 	MFD_HUGETLB_ENTRY(2MB),
93 	MFD_HUGETLB_ENTRY(8MB),
94 	MFD_HUGETLB_ENTRY(16MB),
95 	MFD_HUGETLB_ENTRY(32MB),
96 	MFD_HUGETLB_ENTRY(256MB),
97 	MFD_HUGETLB_ENTRY(512MB),
98 	MFD_HUGETLB_ENTRY(1GB),
99 	MFD_HUGETLB_ENTRY(2GB),
100 	MFD_HUGETLB_ENTRY(16GB),
101 };
102 #undef MFD_HUGETLB_ENTRY
103 
104 #ifdef LINUX_LEGACY_SYSCALLS
105 int
106 linux_creat(struct thread *td, struct linux_creat_args *args)
107 {
108 	char *path;
109 	int error;
110 
111 	if (!LUSECONVPATH(td)) {
112 		return (kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
113 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode));
114 	}
115 	LCONVPATHEXIST(td, args->path, &path);
116 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
117 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
118 	LFREEPATH(path);
119 	return (error);
120 }
121 #endif
122 
123 static int
124 linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags,
125     int mode, enum uio_seg seg)
126 {
127 	struct proc *p = td->td_proc;
128 	struct file *fp;
129 	int fd;
130 	int bsd_flags, error;
131 
132 	bsd_flags = 0;
133 	switch (l_flags & LINUX_O_ACCMODE) {
134 	case LINUX_O_WRONLY:
135 		bsd_flags |= O_WRONLY;
136 		break;
137 	case LINUX_O_RDWR:
138 		bsd_flags |= O_RDWR;
139 		break;
140 	default:
141 		bsd_flags |= O_RDONLY;
142 	}
143 	if (l_flags & LINUX_O_NDELAY)
144 		bsd_flags |= O_NONBLOCK;
145 	if (l_flags & LINUX_O_APPEND)
146 		bsd_flags |= O_APPEND;
147 	if (l_flags & LINUX_O_SYNC)
148 		bsd_flags |= O_FSYNC;
149 	if (l_flags & LINUX_O_CLOEXEC)
150 		bsd_flags |= O_CLOEXEC;
151 	if (l_flags & LINUX_O_NONBLOCK)
152 		bsd_flags |= O_NONBLOCK;
153 	if (l_flags & LINUX_O_ASYNC)
154 		bsd_flags |= O_ASYNC;
155 	if (l_flags & LINUX_O_CREAT)
156 		bsd_flags |= O_CREAT;
157 	if (l_flags & LINUX_O_TRUNC)
158 		bsd_flags |= O_TRUNC;
159 	if (l_flags & LINUX_O_EXCL)
160 		bsd_flags |= O_EXCL;
161 	if (l_flags & LINUX_O_NOCTTY)
162 		bsd_flags |= O_NOCTTY;
163 	if (l_flags & LINUX_O_DIRECT)
164 		bsd_flags |= O_DIRECT;
165 	if (l_flags & LINUX_O_NOFOLLOW)
166 		bsd_flags |= O_NOFOLLOW;
167 	if (l_flags & LINUX_O_DIRECTORY)
168 		bsd_flags |= O_DIRECTORY;
169 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
170 
171 	error = kern_openat(td, dirfd, path, seg, bsd_flags, mode);
172 	if (error != 0) {
173 		if (error == EMLINK)
174 			error = ELOOP;
175 		goto done;
176 	}
177 	if (p->p_flag & P_CONTROLT)
178 		goto done;
179 	if (bsd_flags & O_NOCTTY)
180 		goto done;
181 
182 	/*
183 	 * XXX In between kern_openat() and fget(), another process
184 	 * having the same filedesc could use that fd without
185 	 * checking below.
186 	*/
187 	fd = td->td_retval[0];
188 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
189 		if (fp->f_type != DTYPE_VNODE) {
190 			fdrop(fp, td);
191 			goto done;
192 		}
193 		sx_slock(&proctree_lock);
194 		PROC_LOCK(p);
195 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
196 			PROC_UNLOCK(p);
197 			sx_sunlock(&proctree_lock);
198 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
199 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
200 			    td->td_ucred, td);
201 		} else {
202 			PROC_UNLOCK(p);
203 			sx_sunlock(&proctree_lock);
204 		}
205 		fdrop(fp, td);
206 	}
207 
208 done:
209 	return (error);
210 }
211 
212 int
213 linux_openat(struct thread *td, struct linux_openat_args *args)
214 {
215 	char *path;
216 	int dfd, error;
217 
218 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
219 	if (!LUSECONVPATH(td)) {
220 		return (linux_common_open(td, dfd, args->filename, args->flags,
221 		    args->mode, UIO_USERSPACE));
222 	}
223 	if (args->flags & LINUX_O_CREAT)
224 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
225 	else
226 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
227 
228 	error = linux_common_open(td, dfd, path, args->flags, args->mode,
229 	    UIO_SYSSPACE);
230 	LFREEPATH(path);
231 	return (error);
232 }
233 
234 #ifdef LINUX_LEGACY_SYSCALLS
235 int
236 linux_open(struct thread *td, struct linux_open_args *args)
237 {
238 	char *path;
239 	int error;
240 
241 	if (!LUSECONVPATH(td)) {
242 		return (linux_common_open(td, AT_FDCWD, args->path, args->flags,
243 		    args->mode, UIO_USERSPACE));
244 	}
245 	if (args->flags & LINUX_O_CREAT)
246 		LCONVPATHCREAT(td, args->path, &path);
247 	else
248 		LCONVPATHEXIST(td, args->path, &path);
249 
250 	error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode,
251 	    UIO_SYSSPACE);
252 	LFREEPATH(path);
253 	return (error);
254 }
255 #endif
256 
257 int
258 linux_lseek(struct thread *td, struct linux_lseek_args *args)
259 {
260 
261 	return (kern_lseek(td, args->fdes, args->off, args->whence));
262 }
263 
264 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
265 int
266 linux_llseek(struct thread *td, struct linux_llseek_args *args)
267 {
268 	int error;
269 	off_t off;
270 
271 	off = (args->olow) | (((off_t) args->ohigh) << 32);
272 
273 	error = kern_lseek(td, args->fd, off, args->whence);
274 	if (error != 0)
275 		return (error);
276 
277 	error = copyout(td->td_retval, args->res, sizeof(off_t));
278 	if (error != 0)
279 		return (error);
280 
281 	td->td_retval[0] = 0;
282 	return (0);
283 }
284 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
285 
286 /*
287  * Note that linux_getdents(2) and linux_getdents64(2) have the same
288  * arguments. They only differ in the definition of struct dirent they
289  * operate on.
290  * Note that linux_readdir(2) is a special case of linux_getdents(2)
291  * where count is always equals 1, meaning that the buffer is one
292  * dirent-structure in size and that the code can't handle more anyway.
293  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
294  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
295  * trash user stack.
296  */
297 
298 static int
299 linux_getdents_error(struct thread *td, int fd, int err)
300 {
301 	struct vnode *vp;
302 	struct file *fp;
303 	int error;
304 
305 	/* Linux return ENOTDIR in case when fd is not a directory. */
306 	error = getvnode(td, fd, &cap_read_rights, &fp);
307 	if (error != 0)
308 		return (error);
309 	vp = fp->f_vnode;
310 	if (vp->v_type != VDIR) {
311 		fdrop(fp, td);
312 		return (ENOTDIR);
313 	}
314 	fdrop(fp, td);
315 	return (err);
316 }
317 
318 struct l_dirent {
319 	l_ulong		d_ino;
320 	l_off_t		d_off;
321 	l_ushort	d_reclen;
322 	char		d_name[LINUX_NAME_MAX + 1];
323 };
324 
325 struct l_dirent64 {
326 	uint64_t	d_ino;
327 	int64_t		d_off;
328 	l_ushort	d_reclen;
329 	u_char		d_type;
330 	char		d_name[LINUX_NAME_MAX + 1];
331 };
332 
333 /*
334  * Linux uses the last byte in the dirent buffer to store d_type,
335  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
336  */
337 #define LINUX_RECLEN(namlen)						\
338     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
339 
340 #define LINUX_RECLEN64(namlen)						\
341     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
342     sizeof(uint64_t))
343 
344 #ifdef LINUX_LEGACY_SYSCALLS
345 int
346 linux_getdents(struct thread *td, struct linux_getdents_args *args)
347 {
348 	struct dirent *bdp;
349 	caddr_t inp, buf;		/* BSD-format */
350 	int len, reclen;		/* BSD-format */
351 	caddr_t outp;			/* Linux-format */
352 	int resid, linuxreclen;		/* Linux-format */
353 	caddr_t lbuf;			/* Linux-format */
354 	off_t base;
355 	struct l_dirent *linux_dirent;
356 	int buflen, error;
357 	size_t retval;
358 
359 	buflen = min(args->count, MAXBSIZE);
360 	buf = malloc(buflen, M_TEMP, M_WAITOK);
361 
362 	error = kern_getdirentries(td, args->fd, buf, buflen,
363 	    &base, NULL, UIO_SYSSPACE);
364 	if (error != 0) {
365 		error = linux_getdents_error(td, args->fd, error);
366 		goto out1;
367 	}
368 
369 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
370 
371 	len = td->td_retval[0];
372 	inp = buf;
373 	outp = (caddr_t)args->dent;
374 	resid = args->count;
375 	retval = 0;
376 
377 	while (len > 0) {
378 		bdp = (struct dirent *) inp;
379 		reclen = bdp->d_reclen;
380 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
381 		/*
382 		 * No more space in the user supplied dirent buffer.
383 		 * Return EINVAL.
384 		 */
385 		if (resid < linuxreclen) {
386 			error = EINVAL;
387 			goto out;
388 		}
389 
390 		linux_dirent = (struct l_dirent*)lbuf;
391 		linux_dirent->d_ino = bdp->d_fileno;
392 		linux_dirent->d_off = base + reclen;
393 		linux_dirent->d_reclen = linuxreclen;
394 		/*
395 		 * Copy d_type to last byte of l_dirent buffer
396 		 */
397 		lbuf[linuxreclen - 1] = bdp->d_type;
398 		strlcpy(linux_dirent->d_name, bdp->d_name,
399 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
400 		error = copyout(linux_dirent, outp, linuxreclen);
401 		if (error != 0)
402 			goto out;
403 
404 		inp += reclen;
405 		base += reclen;
406 		len -= reclen;
407 
408 		retval += linuxreclen;
409 		outp += linuxreclen;
410 		resid -= linuxreclen;
411 	}
412 	td->td_retval[0] = retval;
413 
414 out:
415 	free(lbuf, M_TEMP);
416 out1:
417 	free(buf, M_TEMP);
418 	return (error);
419 }
420 #endif
421 
422 int
423 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
424 {
425 	struct dirent *bdp;
426 	caddr_t inp, buf;		/* BSD-format */
427 	int len, reclen;		/* BSD-format */
428 	caddr_t outp;			/* Linux-format */
429 	int resid, linuxreclen;		/* Linux-format */
430 	caddr_t lbuf;			/* Linux-format */
431 	off_t base;
432 	struct l_dirent64 *linux_dirent64;
433 	int buflen, error;
434 	size_t retval;
435 
436 	buflen = min(args->count, MAXBSIZE);
437 	buf = malloc(buflen, M_TEMP, M_WAITOK);
438 
439 	error = kern_getdirentries(td, args->fd, buf, buflen,
440 	    &base, NULL, UIO_SYSSPACE);
441 	if (error != 0) {
442 		error = linux_getdents_error(td, args->fd, error);
443 		goto out1;
444 	}
445 
446 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
447 
448 	len = td->td_retval[0];
449 	inp = buf;
450 	outp = (caddr_t)args->dirent;
451 	resid = args->count;
452 	retval = 0;
453 
454 	while (len > 0) {
455 		bdp = (struct dirent *) inp;
456 		reclen = bdp->d_reclen;
457 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
458 		/*
459 		 * No more space in the user supplied dirent buffer.
460 		 * Return EINVAL.
461 		 */
462 		if (resid < linuxreclen) {
463 			error = EINVAL;
464 			goto out;
465 		}
466 
467 		linux_dirent64 = (struct l_dirent64*)lbuf;
468 		linux_dirent64->d_ino = bdp->d_fileno;
469 		linux_dirent64->d_off = base + reclen;
470 		linux_dirent64->d_reclen = linuxreclen;
471 		linux_dirent64->d_type = bdp->d_type;
472 		strlcpy(linux_dirent64->d_name, bdp->d_name,
473 		    linuxreclen - offsetof(struct l_dirent64, d_name));
474 		error = copyout(linux_dirent64, outp, linuxreclen);
475 		if (error != 0)
476 			goto out;
477 
478 		inp += reclen;
479 		base += reclen;
480 		len -= reclen;
481 
482 		retval += linuxreclen;
483 		outp += linuxreclen;
484 		resid -= linuxreclen;
485 	}
486 	td->td_retval[0] = retval;
487 
488 out:
489 	free(lbuf, M_TEMP);
490 out1:
491 	free(buf, M_TEMP);
492 	return (error);
493 }
494 
495 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
496 int
497 linux_readdir(struct thread *td, struct linux_readdir_args *args)
498 {
499 	struct dirent *bdp;
500 	caddr_t buf;			/* BSD-format */
501 	int linuxreclen;		/* Linux-format */
502 	caddr_t lbuf;			/* Linux-format */
503 	off_t base;
504 	struct l_dirent *linux_dirent;
505 	int buflen, error;
506 
507 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
508 	buf = malloc(buflen, M_TEMP, M_WAITOK);
509 
510 	error = kern_getdirentries(td, args->fd, buf, buflen,
511 	    &base, NULL, UIO_SYSSPACE);
512 	if (error != 0) {
513 		error = linux_getdents_error(td, args->fd, error);
514 		goto out;
515 	}
516 	if (td->td_retval[0] == 0)
517 		goto out;
518 
519 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
520 
521 	bdp = (struct dirent *) buf;
522 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
523 
524 	linux_dirent = (struct l_dirent*)lbuf;
525 	linux_dirent->d_ino = bdp->d_fileno;
526 	linux_dirent->d_off = linuxreclen;
527 	linux_dirent->d_reclen = bdp->d_namlen;
528 	strlcpy(linux_dirent->d_name, bdp->d_name,
529 	    linuxreclen - offsetof(struct l_dirent, d_name));
530 	error = copyout(linux_dirent, args->dent, linuxreclen);
531 	if (error == 0)
532 		td->td_retval[0] = linuxreclen;
533 
534 	free(lbuf, M_TEMP);
535 out:
536 	free(buf, M_TEMP);
537 	return (error);
538 }
539 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
540 
541 /*
542  * These exist mainly for hooks for doing /compat/linux translation.
543  */
544 
545 #ifdef LINUX_LEGACY_SYSCALLS
546 int
547 linux_access(struct thread *td, struct linux_access_args *args)
548 {
549 	char *path;
550 	int error;
551 
552 	/* Linux convention. */
553 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
554 		return (EINVAL);
555 
556 	if (!LUSECONVPATH(td)) {
557 		error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0,
558 		    args->amode);
559 	} else {
560 		LCONVPATHEXIST(td, args->path, &path);
561 		error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
562 		    args->amode);
563 		LFREEPATH(path);
564 	}
565 
566 	return (error);
567 }
568 #endif
569 
570 int
571 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
572 {
573 	char *path;
574 	int error, dfd;
575 
576 	/* Linux convention. */
577 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
578 		return (EINVAL);
579 
580 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
581 	if (!LUSECONVPATH(td)) {
582 		error = kern_accessat(td, dfd, args->filename, UIO_USERSPACE, 0, args->amode);
583 	} else {
584 		LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
585 		error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
586 		LFREEPATH(path);
587 	}
588 
589 	return (error);
590 }
591 
592 #ifdef LINUX_LEGACY_SYSCALLS
593 int
594 linux_unlink(struct thread *td, struct linux_unlink_args *args)
595 {
596 	char *path;
597 	int error;
598 	struct stat st;
599 
600 	if (!LUSECONVPATH(td)) {
601 		error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE,
602 		    UIO_USERSPACE, 0, 0);
603 		if (error == EPERM) {
604 			/* Introduce POSIX noncompliant behaviour of Linux */
605 			if (kern_statat(td, 0, AT_FDCWD, args->path,
606 			    UIO_SYSSPACE, &st, NULL) == 0) {
607 				if (S_ISDIR(st.st_mode))
608 					error = EISDIR;
609 			}
610 		}
611 	} else {
612 		LCONVPATHEXIST(td, args->path, &path);
613 		error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
614 		if (error == EPERM) {
615 			/* Introduce POSIX noncompliant behaviour of Linux */
616 			if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
617 			    NULL) == 0) {
618 				if (S_ISDIR(st.st_mode))
619 					error = EISDIR;
620 			}
621 		}
622 		LFREEPATH(path);
623 	}
624 
625 	return (error);
626 }
627 #endif
628 
629 static int
630 linux_unlinkat_impl(struct thread *td, enum uio_seg pathseg, const char *path,
631     int dfd, struct linux_unlinkat_args *args)
632 {
633 	struct stat st;
634 	int error;
635 
636 	if (args->flag & LINUX_AT_REMOVEDIR)
637 		error = kern_frmdirat(td, dfd, path, FD_NONE, pathseg, 0);
638 	else
639 		error = kern_funlinkat(td, dfd, path, FD_NONE, pathseg, 0, 0);
640 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
641 		/* Introduce POSIX noncompliant behaviour of Linux */
642 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
643 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
644 			error = EISDIR;
645 	}
646 	return (error);
647 }
648 
649 int
650 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
651 {
652 	char *path;
653 	int error, dfd;
654 
655 	if (args->flag & ~LINUX_AT_REMOVEDIR)
656 		return (EINVAL);
657 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
658 	if (!LUSECONVPATH(td)) {
659 		return (linux_unlinkat_impl(td, UIO_USERSPACE, args->pathname,
660 		    dfd, args));
661 	}
662 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
663 	error = linux_unlinkat_impl(td, UIO_SYSSPACE, path, dfd, args);
664 	LFREEPATH(path);
665 	return (error);
666 }
667 int
668 linux_chdir(struct thread *td, struct linux_chdir_args *args)
669 {
670 	char *path;
671 	int error;
672 
673 	if (!LUSECONVPATH(td)) {
674 		return (kern_chdir(td, args->path, UIO_USERSPACE));
675 	}
676 	LCONVPATHEXIST(td, args->path, &path);
677 	error = kern_chdir(td, path, UIO_SYSSPACE);
678 	LFREEPATH(path);
679 	return (error);
680 }
681 
682 #ifdef LINUX_LEGACY_SYSCALLS
683 int
684 linux_chmod(struct thread *td, struct linux_chmod_args *args)
685 {
686 	char *path;
687 	int error;
688 
689 	if (!LUSECONVPATH(td)) {
690 		return (kern_fchmodat(td, AT_FDCWD, args->path, UIO_USERSPACE,
691 		    args->mode, 0));
692 	}
693 	LCONVPATHEXIST(td, args->path, &path);
694 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0);
695 	LFREEPATH(path);
696 	return (error);
697 }
698 #endif
699 
700 int
701 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
702 {
703 	char *path;
704 	int error, dfd;
705 
706 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
707 	if (!LUSECONVPATH(td)) {
708 		return (kern_fchmodat(td, dfd, args->filename, UIO_USERSPACE,
709 		    args->mode, 0));
710 	}
711 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
712 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
713 	LFREEPATH(path);
714 	return (error);
715 }
716 
717 #ifdef LINUX_LEGACY_SYSCALLS
718 int
719 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
720 {
721 	char *path;
722 	int error;
723 
724 	if (!LUSECONVPATH(td)) {
725 		return (kern_mkdirat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode));
726 	}
727 	LCONVPATHCREAT(td, args->path, &path);
728 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
729 	LFREEPATH(path);
730 	return (error);
731 }
732 #endif
733 
734 int
735 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
736 {
737 	char *path;
738 	int error, dfd;
739 
740 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
741 	if (!LUSECONVPATH(td)) {
742 		return (kern_mkdirat(td, dfd, args->pathname, UIO_USERSPACE, args->mode));
743 	}
744 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
745 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
746 	LFREEPATH(path);
747 	return (error);
748 }
749 
750 #ifdef LINUX_LEGACY_SYSCALLS
751 int
752 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
753 {
754 	char *path;
755 	int error;
756 
757 	if (!LUSECONVPATH(td)) {
758 		return (kern_frmdirat(td, AT_FDCWD, args->path, FD_NONE,
759 		    UIO_USERSPACE, 0));
760 	}
761 	LCONVPATHEXIST(td, args->path, &path);
762 	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
763 	LFREEPATH(path);
764 	return (error);
765 }
766 
767 int
768 linux_rename(struct thread *td, struct linux_rename_args *args)
769 {
770 	char *from, *to;
771 	int error;
772 
773 	if (!LUSECONVPATH(td)) {
774 		return (kern_renameat(td, AT_FDCWD, args->from, AT_FDCWD,
775 		    args->to, UIO_USERSPACE));
776 	}
777 	LCONVPATHEXIST(td, args->from, &from);
778 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
779 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
780 	if (to == NULL) {
781 		LFREEPATH(from);
782 		return (error);
783 	}
784 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
785 	LFREEPATH(from);
786 	LFREEPATH(to);
787 	return (error);
788 }
789 #endif
790 
791 int
792 linux_renameat(struct thread *td, struct linux_renameat_args *args)
793 {
794 	struct linux_renameat2_args renameat2_args = {
795 	    .olddfd = args->olddfd,
796 	    .oldname = args->oldname,
797 	    .newdfd = args->newdfd,
798 	    .newname = args->newname,
799 	    .flags = 0
800 	};
801 
802 	return (linux_renameat2(td, &renameat2_args));
803 }
804 
805 int
806 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
807 {
808 	char *from, *to;
809 	int error, olddfd, newdfd;
810 
811 	if (args->flags != 0) {
812 		if (args->flags & ~(LINUX_RENAME_EXCHANGE |
813 		    LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
814 			return (EINVAL);
815 		if (args->flags & LINUX_RENAME_EXCHANGE &&
816 		    args->flags & (LINUX_RENAME_NOREPLACE |
817 		    LINUX_RENAME_WHITEOUT))
818 			return (EINVAL);
819 #if 0
820 		/*
821 		 * This spams the console on Ubuntu Focal.
822 		 *
823 		 * What's needed here is a general mechanism to let users know
824 		 * about missing features without hogging the system.
825 		 */
826 		linux_msg(td, "renameat2 unsupported flags 0x%x",
827 		    args->flags);
828 #endif
829 		return (EINVAL);
830 	}
831 
832 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
833 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
834 	if (!LUSECONVPATH(td)) {
835 		return (kern_renameat(td, olddfd, args->oldname, newdfd,
836 		    args->newname, UIO_USERSPACE));
837 	}
838 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
839 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
840 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
841 	if (to == NULL) {
842 		LFREEPATH(from);
843 		return (error);
844 	}
845 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
846 	LFREEPATH(from);
847 	LFREEPATH(to);
848 	return (error);
849 }
850 
851 #ifdef LINUX_LEGACY_SYSCALLS
852 int
853 linux_symlink(struct thread *td, struct linux_symlink_args *args)
854 {
855 	char *path, *to;
856 	int error;
857 
858 	if (!LUSECONVPATH(td)) {
859 		return (kern_symlinkat(td, args->path, AT_FDCWD, args->to,
860 		    UIO_USERSPACE));
861 	}
862 	LCONVPATHEXIST(td, args->path, &path);
863 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
864 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
865 	if (to == NULL) {
866 		LFREEPATH(path);
867 		return (error);
868 	}
869 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
870 	LFREEPATH(path);
871 	LFREEPATH(to);
872 	return (error);
873 }
874 #endif
875 
876 int
877 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
878 {
879 	char *path, *to;
880 	int error, dfd;
881 
882 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
883 	if (!LUSECONVPATH(td)) {
884 		return (kern_symlinkat(td, args->oldname, dfd, args->newname,
885 		    UIO_USERSPACE));
886 	}
887 	LCONVPATHEXIST(td, args->oldname, &path);
888 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
889 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
890 	if (to == NULL) {
891 		LFREEPATH(path);
892 		return (error);
893 	}
894 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
895 	LFREEPATH(path);
896 	LFREEPATH(to);
897 	return (error);
898 }
899 
900 #ifdef LINUX_LEGACY_SYSCALLS
901 int
902 linux_readlink(struct thread *td, struct linux_readlink_args *args)
903 {
904 	char *name;
905 	int error;
906 
907 	if (!LUSECONVPATH(td)) {
908 		return (kern_readlinkat(td, AT_FDCWD, args->name, UIO_USERSPACE,
909 		    args->buf, UIO_USERSPACE, args->count));
910 	}
911 	LCONVPATHEXIST(td, args->name, &name);
912 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
913 	    args->buf, UIO_USERSPACE, args->count);
914 	LFREEPATH(name);
915 	return (error);
916 }
917 #endif
918 
919 int
920 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
921 {
922 	char *name;
923 	int error, dfd;
924 
925 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
926 	if (!LUSECONVPATH(td)) {
927 		return (kern_readlinkat(td, dfd, args->path, UIO_USERSPACE,
928 		    args->buf, UIO_USERSPACE, args->bufsiz));
929 	}
930 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
931 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
932 	    UIO_USERSPACE, args->bufsiz);
933 	LFREEPATH(name);
934 	return (error);
935 }
936 
937 int
938 linux_truncate(struct thread *td, struct linux_truncate_args *args)
939 {
940 	char *path;
941 	int error;
942 
943 	if (!LUSECONVPATH(td)) {
944 		return (kern_truncate(td, args->path, UIO_USERSPACE, args->length));
945 	}
946 	LCONVPATHEXIST(td, args->path, &path);
947 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
948 	LFREEPATH(path);
949 	return (error);
950 }
951 
952 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
953 int
954 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
955 {
956 	char *path;
957 	off_t length;
958 	int error;
959 
960 #if defined(__amd64__) && defined(COMPAT_LINUX32)
961 	length = PAIR32TO64(off_t, args->length);
962 #else
963 	length = args->length;
964 #endif
965 
966 	if (!LUSECONVPATH(td)) {
967 		return (kern_truncate(td, args->path, UIO_USERSPACE, length));
968 	}
969 	LCONVPATHEXIST(td, args->path, &path);
970 	error = kern_truncate(td, path, UIO_SYSSPACE, length);
971 	LFREEPATH(path);
972 	return (error);
973 }
974 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
975 
976 int
977 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
978 {
979 
980 	return (kern_ftruncate(td, args->fd, args->length));
981 }
982 
983 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
984 int
985 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
986 {
987 	off_t length;
988 
989 #if defined(__amd64__) && defined(COMPAT_LINUX32)
990 	length = PAIR32TO64(off_t, args->length);
991 #else
992 	length = args->length;
993 #endif
994 
995 	return (kern_ftruncate(td, args->fd, length));
996 }
997 #endif
998 
999 #ifdef LINUX_LEGACY_SYSCALLS
1000 int
1001 linux_link(struct thread *td, struct linux_link_args *args)
1002 {
1003 	char *path, *to;
1004 	int error;
1005 
1006 	if (!LUSECONVPATH(td)) {
1007 		return (kern_linkat(td, AT_FDCWD, AT_FDCWD, args->path, args->to,
1008 		    UIO_USERSPACE, FOLLOW));
1009 	}
1010 	LCONVPATHEXIST(td, args->path, &path);
1011 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1012 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
1013 	if (to == NULL) {
1014 		LFREEPATH(path);
1015 		return (error);
1016 	}
1017 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
1018 	    FOLLOW);
1019 	LFREEPATH(path);
1020 	LFREEPATH(to);
1021 	return (error);
1022 }
1023 #endif
1024 
1025 int
1026 linux_linkat(struct thread *td, struct linux_linkat_args *args)
1027 {
1028 	char *path, *to;
1029 	int error, olddfd, newdfd, follow;
1030 
1031 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
1032 		return (EINVAL);
1033 
1034 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
1035 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
1036 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
1037 	    FOLLOW;
1038 	if (!LUSECONVPATH(td)) {
1039 		return (kern_linkat(td, olddfd, newdfd, args->oldname,
1040 		    args->newname, UIO_USERSPACE, follow));
1041 	}
1042 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
1043 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
1044 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
1045 	if (to == NULL) {
1046 		LFREEPATH(path);
1047 		return (error);
1048 	}
1049 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
1050 	LFREEPATH(path);
1051 	LFREEPATH(to);
1052 	return (error);
1053 }
1054 
1055 int
1056 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
1057 {
1058 
1059 	return (kern_fsync(td, uap->fd, false));
1060 }
1061 
1062 int
1063 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
1064 {
1065 	off_t nbytes, offset;
1066 
1067 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1068 	nbytes = PAIR32TO64(off_t, uap->nbytes);
1069 	offset = PAIR32TO64(off_t, uap->offset);
1070 #else
1071 	nbytes = uap->nbytes;
1072 	offset = uap->offset;
1073 #endif
1074 
1075 	if (offset < 0 || nbytes < 0 ||
1076 	    (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
1077 	    LINUX_SYNC_FILE_RANGE_WRITE |
1078 	    LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
1079 		return (EINVAL);
1080 	}
1081 
1082 	return (kern_fsync(td, uap->fd, false));
1083 }
1084 
1085 int
1086 linux_pread(struct thread *td, struct linux_pread_args *uap)
1087 {
1088 	struct vnode *vp;
1089 	off_t offset;
1090 	int error;
1091 
1092 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1093 	offset = PAIR32TO64(off_t, uap->offset);
1094 #else
1095 	offset = uap->offset;
1096 #endif
1097 
1098 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
1099 	if (error == 0) {
1100 		/* This seems to violate POSIX but Linux does it. */
1101 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
1102 		if (error != 0)
1103 			return (error);
1104 		if (vp->v_type == VDIR)
1105 			error = EISDIR;
1106 		vrele(vp);
1107 	}
1108 	return (error);
1109 }
1110 
1111 int
1112 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
1113 {
1114 	off_t offset;
1115 
1116 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1117 	offset = PAIR32TO64(off_t, uap->offset);
1118 #else
1119 	offset = uap->offset;
1120 #endif
1121 
1122 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
1123 }
1124 
1125 int
1126 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
1127 {
1128 	struct uio *auio;
1129 	int error;
1130 	off_t offset;
1131 
1132 	/*
1133 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1134 	 * pos_l and pos_h, respectively, contain the
1135 	 * low order and high order 32 bits of offset.
1136 	 */
1137 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1138 	    (sizeof(offset) * 4)) | uap->pos_l;
1139 	if (offset < 0)
1140 		return (EINVAL);
1141 #ifdef COMPAT_LINUX32
1142 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1143 #else
1144 	error = copyinuio(uap->vec, uap->vlen, &auio);
1145 #endif
1146 	if (error != 0)
1147 		return (error);
1148 	error = kern_preadv(td, uap->fd, auio, offset);
1149 	free(auio, M_IOV);
1150 	return (error);
1151 }
1152 
1153 int
1154 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1155 {
1156 	struct uio *auio;
1157 	int error;
1158 	off_t offset;
1159 
1160 	/*
1161 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1162 	 * pos_l and pos_h, respectively, contain the
1163 	 * low order and high order 32 bits of offset.
1164 	 */
1165 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1166 	    (sizeof(offset) * 4)) | uap->pos_l;
1167 	if (offset < 0)
1168 		return (EINVAL);
1169 #ifdef COMPAT_LINUX32
1170 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1171 #else
1172 	error = copyinuio(uap->vec, uap->vlen, &auio);
1173 #endif
1174 	if (error != 0)
1175 		return (error);
1176 	error = kern_pwritev(td, uap->fd, auio, offset);
1177 	free(auio, M_IOV);
1178 	return (error);
1179 }
1180 
1181 int
1182 linux_mount(struct thread *td, struct linux_mount_args *args)
1183 {
1184 	char fstypename[MFSNAMELEN];
1185 	char *mntonname, *mntfromname;
1186 	int error, fsflags;
1187 
1188 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1189 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1190 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
1191 	    NULL);
1192 	if (error != 0)
1193 		goto out;
1194 	if (args->specialfile != NULL) {
1195 		error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1196 		if (error != 0)
1197 			goto out;
1198 	} else {
1199 		mntfromname[0] = '\0';
1200 	}
1201 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1202 	if (error != 0)
1203 		goto out;
1204 
1205 	if (strcmp(fstypename, "ext2") == 0) {
1206 		strcpy(fstypename, "ext2fs");
1207 	} else if (strcmp(fstypename, "proc") == 0) {
1208 		strcpy(fstypename, "linprocfs");
1209 	} else if (strcmp(fstypename, "vfat") == 0) {
1210 		strcpy(fstypename, "msdosfs");
1211 	}
1212 
1213 	fsflags = 0;
1214 
1215 	/*
1216 	 * Linux SYNC flag is not included; the closest equivalent
1217 	 * FreeBSD has is !ASYNC, which is our default.
1218 	 */
1219 	if (args->rwflag & LINUX_MS_RDONLY)
1220 		fsflags |= MNT_RDONLY;
1221 	if (args->rwflag & LINUX_MS_NOSUID)
1222 		fsflags |= MNT_NOSUID;
1223 	if (args->rwflag & LINUX_MS_NOEXEC)
1224 		fsflags |= MNT_NOEXEC;
1225 	if (args->rwflag & LINUX_MS_REMOUNT)
1226 		fsflags |= MNT_UPDATE;
1227 
1228 	error = kernel_vmount(fsflags,
1229 	    "fstype", fstypename,
1230 	    "fspath", mntonname,
1231 	    "from", mntfromname,
1232 	    NULL);
1233 out:
1234 	free(mntonname, M_TEMP);
1235 	free(mntfromname, M_TEMP);
1236 	return (error);
1237 }
1238 
1239 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1240 int
1241 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1242 {
1243 
1244 	return (kern_unmount(td, args->path, 0));
1245 }
1246 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1247 
1248 #ifdef LINUX_LEGACY_SYSCALLS
1249 int
1250 linux_umount(struct thread *td, struct linux_umount_args *args)
1251 {
1252 	int flags;
1253 
1254 	flags = 0;
1255 	if ((args->flags & LINUX_MNT_FORCE) != 0) {
1256 		args->flags &= ~LINUX_MNT_FORCE;
1257 		flags |= MNT_FORCE;
1258 	}
1259 	if (args->flags != 0) {
1260 		linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1261 		return (EINVAL);
1262 	}
1263 
1264 	return (kern_unmount(td, args->path, flags));
1265 }
1266 #endif
1267 
1268 /*
1269  * fcntl family of syscalls
1270  */
1271 
1272 struct l_flock {
1273 	l_short		l_type;
1274 	l_short		l_whence;
1275 	l_off_t		l_start;
1276 	l_off_t		l_len;
1277 	l_pid_t		l_pid;
1278 }
1279 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1280 __packed
1281 #endif
1282 ;
1283 
1284 static void
1285 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1286 {
1287 	switch (linux_flock->l_type) {
1288 	case LINUX_F_RDLCK:
1289 		bsd_flock->l_type = F_RDLCK;
1290 		break;
1291 	case LINUX_F_WRLCK:
1292 		bsd_flock->l_type = F_WRLCK;
1293 		break;
1294 	case LINUX_F_UNLCK:
1295 		bsd_flock->l_type = F_UNLCK;
1296 		break;
1297 	default:
1298 		bsd_flock->l_type = -1;
1299 		break;
1300 	}
1301 	bsd_flock->l_whence = linux_flock->l_whence;
1302 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1303 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1304 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1305 	bsd_flock->l_sysid = 0;
1306 }
1307 
1308 static void
1309 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1310 {
1311 	switch (bsd_flock->l_type) {
1312 	case F_RDLCK:
1313 		linux_flock->l_type = LINUX_F_RDLCK;
1314 		break;
1315 	case F_WRLCK:
1316 		linux_flock->l_type = LINUX_F_WRLCK;
1317 		break;
1318 	case F_UNLCK:
1319 		linux_flock->l_type = LINUX_F_UNLCK;
1320 		break;
1321 	}
1322 	linux_flock->l_whence = bsd_flock->l_whence;
1323 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1324 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1325 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1326 }
1327 
1328 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1329 struct l_flock64 {
1330 	l_short		l_type;
1331 	l_short		l_whence;
1332 	l_loff_t	l_start;
1333 	l_loff_t	l_len;
1334 	l_pid_t		l_pid;
1335 }
1336 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1337 __packed
1338 #endif
1339 ;
1340 
1341 static void
1342 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1343 {
1344 	switch (linux_flock->l_type) {
1345 	case LINUX_F_RDLCK:
1346 		bsd_flock->l_type = F_RDLCK;
1347 		break;
1348 	case LINUX_F_WRLCK:
1349 		bsd_flock->l_type = F_WRLCK;
1350 		break;
1351 	case LINUX_F_UNLCK:
1352 		bsd_flock->l_type = F_UNLCK;
1353 		break;
1354 	default:
1355 		bsd_flock->l_type = -1;
1356 		break;
1357 	}
1358 	bsd_flock->l_whence = linux_flock->l_whence;
1359 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1360 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1361 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1362 	bsd_flock->l_sysid = 0;
1363 }
1364 
1365 static void
1366 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1367 {
1368 	switch (bsd_flock->l_type) {
1369 	case F_RDLCK:
1370 		linux_flock->l_type = LINUX_F_RDLCK;
1371 		break;
1372 	case F_WRLCK:
1373 		linux_flock->l_type = LINUX_F_WRLCK;
1374 		break;
1375 	case F_UNLCK:
1376 		linux_flock->l_type = LINUX_F_UNLCK;
1377 		break;
1378 	}
1379 	linux_flock->l_whence = bsd_flock->l_whence;
1380 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1381 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1382 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1383 }
1384 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1385 
1386 static int
1387 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1388 {
1389 	struct l_flock linux_flock;
1390 	struct flock bsd_flock;
1391 	struct file *fp;
1392 	long arg;
1393 	int error, result;
1394 
1395 	switch (args->cmd) {
1396 	case LINUX_F_DUPFD:
1397 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1398 
1399 	case LINUX_F_GETFD:
1400 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1401 
1402 	case LINUX_F_SETFD:
1403 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1404 
1405 	case LINUX_F_GETFL:
1406 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1407 		result = td->td_retval[0];
1408 		td->td_retval[0] = 0;
1409 		if (result & O_RDONLY)
1410 			td->td_retval[0] |= LINUX_O_RDONLY;
1411 		if (result & O_WRONLY)
1412 			td->td_retval[0] |= LINUX_O_WRONLY;
1413 		if (result & O_RDWR)
1414 			td->td_retval[0] |= LINUX_O_RDWR;
1415 		if (result & O_NDELAY)
1416 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1417 		if (result & O_APPEND)
1418 			td->td_retval[0] |= LINUX_O_APPEND;
1419 		if (result & O_FSYNC)
1420 			td->td_retval[0] |= LINUX_O_SYNC;
1421 		if (result & O_ASYNC)
1422 			td->td_retval[0] |= LINUX_O_ASYNC;
1423 #ifdef LINUX_O_NOFOLLOW
1424 		if (result & O_NOFOLLOW)
1425 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1426 #endif
1427 #ifdef LINUX_O_DIRECT
1428 		if (result & O_DIRECT)
1429 			td->td_retval[0] |= LINUX_O_DIRECT;
1430 #endif
1431 		return (error);
1432 
1433 	case LINUX_F_SETFL:
1434 		arg = 0;
1435 		if (args->arg & LINUX_O_NDELAY)
1436 			arg |= O_NONBLOCK;
1437 		if (args->arg & LINUX_O_APPEND)
1438 			arg |= O_APPEND;
1439 		if (args->arg & LINUX_O_SYNC)
1440 			arg |= O_FSYNC;
1441 		if (args->arg & LINUX_O_ASYNC)
1442 			arg |= O_ASYNC;
1443 #ifdef LINUX_O_NOFOLLOW
1444 		if (args->arg & LINUX_O_NOFOLLOW)
1445 			arg |= O_NOFOLLOW;
1446 #endif
1447 #ifdef LINUX_O_DIRECT
1448 		if (args->arg & LINUX_O_DIRECT)
1449 			arg |= O_DIRECT;
1450 #endif
1451 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1452 
1453 	case LINUX_F_GETLK:
1454 		error = copyin((void *)args->arg, &linux_flock,
1455 		    sizeof(linux_flock));
1456 		if (error)
1457 			return (error);
1458 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1459 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1460 		if (error)
1461 			return (error);
1462 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1463 		return (copyout(&linux_flock, (void *)args->arg,
1464 		    sizeof(linux_flock)));
1465 
1466 	case LINUX_F_SETLK:
1467 		error = copyin((void *)args->arg, &linux_flock,
1468 		    sizeof(linux_flock));
1469 		if (error)
1470 			return (error);
1471 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1472 		return (kern_fcntl(td, args->fd, F_SETLK,
1473 		    (intptr_t)&bsd_flock));
1474 
1475 	case LINUX_F_SETLKW:
1476 		error = copyin((void *)args->arg, &linux_flock,
1477 		    sizeof(linux_flock));
1478 		if (error)
1479 			return (error);
1480 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1481 		return (kern_fcntl(td, args->fd, F_SETLKW,
1482 		     (intptr_t)&bsd_flock));
1483 
1484 	case LINUX_F_GETOWN:
1485 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1486 
1487 	case LINUX_F_SETOWN:
1488 		/*
1489 		 * XXX some Linux applications depend on F_SETOWN having no
1490 		 * significant effect for pipes (SIGIO is not delivered for
1491 		 * pipes under Linux-2.2.35 at least).
1492 		 */
1493 		error = fget(td, args->fd,
1494 		    &cap_fcntl_rights, &fp);
1495 		if (error)
1496 			return (error);
1497 		if (fp->f_type == DTYPE_PIPE) {
1498 			fdrop(fp, td);
1499 			return (EINVAL);
1500 		}
1501 		fdrop(fp, td);
1502 
1503 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1504 
1505 	case LINUX_F_DUPFD_CLOEXEC:
1506 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1507 	/*
1508 	 * Our F_SEAL_* values match Linux one for maximum compatibility.  So we
1509 	 * only needed to account for different values for fcntl(2) commands.
1510 	 */
1511 	case LINUX_F_GET_SEALS:
1512 		error = kern_fcntl(td, args->fd, F_GET_SEALS, 0);
1513 		if (error != 0)
1514 			return (error);
1515 		td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0],
1516 		    seal_bitmap, 0);
1517 		return (0);
1518 
1519 	case LINUX_F_ADD_SEALS:
1520 		return (kern_fcntl(td, args->fd, F_ADD_SEALS,
1521 		    linux_to_bsd_bits(args->arg, seal_bitmap, 0)));
1522 	default:
1523 		linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd);
1524 		return (EINVAL);
1525 	}
1526 }
1527 
1528 int
1529 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1530 {
1531 
1532 	return (fcntl_common(td, args));
1533 }
1534 
1535 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1536 int
1537 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1538 {
1539 	struct l_flock64 linux_flock;
1540 	struct flock bsd_flock;
1541 	struct linux_fcntl_args fcntl_args;
1542 	int error;
1543 
1544 	switch (args->cmd) {
1545 	case LINUX_F_GETLK64:
1546 		error = copyin((void *)args->arg, &linux_flock,
1547 		    sizeof(linux_flock));
1548 		if (error)
1549 			return (error);
1550 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1551 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1552 		if (error)
1553 			return (error);
1554 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1555 		return (copyout(&linux_flock, (void *)args->arg,
1556 			    sizeof(linux_flock)));
1557 
1558 	case LINUX_F_SETLK64:
1559 		error = copyin((void *)args->arg, &linux_flock,
1560 		    sizeof(linux_flock));
1561 		if (error)
1562 			return (error);
1563 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1564 		return (kern_fcntl(td, args->fd, F_SETLK,
1565 		    (intptr_t)&bsd_flock));
1566 
1567 	case LINUX_F_SETLKW64:
1568 		error = copyin((void *)args->arg, &linux_flock,
1569 		    sizeof(linux_flock));
1570 		if (error)
1571 			return (error);
1572 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1573 		return (kern_fcntl(td, args->fd, F_SETLKW,
1574 		    (intptr_t)&bsd_flock));
1575 	}
1576 
1577 	fcntl_args.fd = args->fd;
1578 	fcntl_args.cmd = args->cmd;
1579 	fcntl_args.arg = args->arg;
1580 	return (fcntl_common(td, &fcntl_args));
1581 }
1582 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1583 
1584 #ifdef LINUX_LEGACY_SYSCALLS
1585 int
1586 linux_chown(struct thread *td, struct linux_chown_args *args)
1587 {
1588 	char *path;
1589 	int error;
1590 
1591 	if (!LUSECONVPATH(td)) {
1592 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE,
1593 		    args->uid, args->gid, 0));
1594 	}
1595 	LCONVPATHEXIST(td, args->path, &path);
1596 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1597 	    args->gid, 0);
1598 	LFREEPATH(path);
1599 	return (error);
1600 }
1601 #endif
1602 
1603 int
1604 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1605 {
1606 	char *path;
1607 	int error, dfd, flag;
1608 
1609 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
1610 		return (EINVAL);
1611 
1612 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
1613 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1614 	    AT_SYMLINK_NOFOLLOW;
1615 	if (!LUSECONVPATH(td)) {
1616 		return (kern_fchownat(td, dfd, args->filename, UIO_USERSPACE,
1617 		    args->uid, args->gid, flag));
1618 	}
1619 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1620 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1621 	    flag);
1622 	LFREEPATH(path);
1623 	return (error);
1624 }
1625 
1626 #ifdef LINUX_LEGACY_SYSCALLS
1627 int
1628 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1629 {
1630 	char *path;
1631 	int error;
1632 
1633 	if (!LUSECONVPATH(td)) {
1634 		return (kern_fchownat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->uid,
1635 		    args->gid, AT_SYMLINK_NOFOLLOW));
1636 	}
1637 	LCONVPATHEXIST(td, args->path, &path);
1638 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid,
1639 	    AT_SYMLINK_NOFOLLOW);
1640 	LFREEPATH(path);
1641 	return (error);
1642 }
1643 #endif
1644 
1645 static int
1646 convert_fadvice(int advice)
1647 {
1648 	switch (advice) {
1649 	case LINUX_POSIX_FADV_NORMAL:
1650 		return (POSIX_FADV_NORMAL);
1651 	case LINUX_POSIX_FADV_RANDOM:
1652 		return (POSIX_FADV_RANDOM);
1653 	case LINUX_POSIX_FADV_SEQUENTIAL:
1654 		return (POSIX_FADV_SEQUENTIAL);
1655 	case LINUX_POSIX_FADV_WILLNEED:
1656 		return (POSIX_FADV_WILLNEED);
1657 	case LINUX_POSIX_FADV_DONTNEED:
1658 		return (POSIX_FADV_DONTNEED);
1659 	case LINUX_POSIX_FADV_NOREUSE:
1660 		return (POSIX_FADV_NOREUSE);
1661 	default:
1662 		return (-1);
1663 	}
1664 }
1665 
1666 int
1667 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1668 {
1669 	off_t offset;
1670 	int advice;
1671 
1672 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1673 	offset = PAIR32TO64(off_t, args->offset);
1674 #else
1675 	offset = args->offset;
1676 #endif
1677 
1678 	advice = convert_fadvice(args->advice);
1679 	if (advice == -1)
1680 		return (EINVAL);
1681 	return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1682 }
1683 
1684 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1685 int
1686 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1687 {
1688 	off_t len, offset;
1689 	int advice;
1690 
1691 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1692 	len = PAIR32TO64(off_t, args->len);
1693 	offset = PAIR32TO64(off_t, args->offset);
1694 #else
1695 	len = args->len;
1696 	offset = args->offset;
1697 #endif
1698 
1699 	advice = convert_fadvice(args->advice);
1700 	if (advice == -1)
1701 		return (EINVAL);
1702 	return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1703 }
1704 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1705 
1706 #ifdef LINUX_LEGACY_SYSCALLS
1707 int
1708 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1709 {
1710 	int fildes[2];
1711 	int error;
1712 
1713 	error = kern_pipe(td, fildes, 0, NULL, NULL);
1714 	if (error != 0)
1715 		return (error);
1716 
1717 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1718 	if (error != 0) {
1719 		(void)kern_close(td, fildes[0]);
1720 		(void)kern_close(td, fildes[1]);
1721 	}
1722 
1723 	return (error);
1724 }
1725 #endif
1726 
1727 int
1728 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1729 {
1730 	int fildes[2];
1731 	int error, flags;
1732 
1733 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1734 		return (EINVAL);
1735 
1736 	flags = 0;
1737 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
1738 		flags |= O_NONBLOCK;
1739 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
1740 		flags |= O_CLOEXEC;
1741 	error = kern_pipe(td, fildes, flags, NULL, NULL);
1742 	if (error != 0)
1743 		return (error);
1744 
1745 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1746 	if (error != 0) {
1747 		(void)kern_close(td, fildes[0]);
1748 		(void)kern_close(td, fildes[1]);
1749 	}
1750 
1751 	return (error);
1752 }
1753 
1754 int
1755 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1756 {
1757 	int cmd;
1758 	intptr_t newfd;
1759 
1760 	if (args->oldfd == args->newfd)
1761 		return (EINVAL);
1762 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1763 		return (EINVAL);
1764 	if (args->flags & LINUX_O_CLOEXEC)
1765 		cmd = F_DUP2FD_CLOEXEC;
1766 	else
1767 		cmd = F_DUP2FD;
1768 
1769 	newfd = args->newfd;
1770 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
1771 }
1772 
1773 int
1774 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1775 {
1776 	off_t len, offset;
1777 
1778 	/*
1779 	 * We emulate only posix_fallocate system call for which
1780 	 * mode should be 0.
1781 	 */
1782 	if (args->mode != 0)
1783 		return (EOPNOTSUPP);
1784 
1785 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1786 	len = PAIR32TO64(off_t, args->len);
1787 	offset = PAIR32TO64(off_t, args->offset);
1788 #else
1789 	len = args->len;
1790 	offset = args->offset;
1791 #endif
1792 
1793 	return (kern_posix_fallocate(td, args->fd, offset, len));
1794 }
1795 
1796 int
1797 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1798     *args)
1799 {
1800 	l_loff_t inoff, outoff, *inoffp, *outoffp;
1801 	int error, flags;
1802 
1803 	/*
1804 	 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1805 	 * the native implementation.  Enforce it.
1806 	 */
1807 	if (args->flags != 0) {
1808 		linux_msg(td, "copy_file_range unsupported flags 0x%x",
1809 		    args->flags);
1810 		return (EINVAL);
1811 	}
1812 	flags = 0;
1813 	inoffp = outoffp = NULL;
1814 	if (args->off_in != NULL) {
1815 		error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1816 		if (error != 0)
1817 			return (error);
1818 		inoffp = &inoff;
1819 	}
1820 	if (args->off_out != NULL) {
1821 		error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1822 		if (error != 0)
1823 			return (error);
1824 		outoffp = &outoff;
1825 	}
1826 
1827 	error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1828 	    outoffp, args->len, flags);
1829 	if (error == 0 && args->off_in != NULL)
1830 		error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1831 	if (error == 0 && args->off_out != NULL)
1832 		error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1833 	return (error);
1834 }
1835 
1836 #define	LINUX_MEMFD_PREFIX	"memfd:"
1837 
1838 int
1839 linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
1840 {
1841 	char memfd_name[LINUX_NAME_MAX + 1];
1842 	int error, flags, shmflags, oflags;
1843 
1844 	/*
1845 	 * This is our clever trick to avoid the heap allocation to copy in the
1846 	 * uname.  We don't really need to go this far out of our way, but it
1847 	 * does keep the rest of this function fairly clean as they don't have
1848 	 * to worry about cleanup on the way out.
1849 	 */
1850 	error = copyinstr(args->uname_ptr,
1851 	    memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1,
1852 	    LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL);
1853 	if (error != 0) {
1854 		if (error == ENAMETOOLONG)
1855 			error = EINVAL;
1856 		return (error);
1857 	}
1858 
1859 	memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1);
1860 	flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0);
1861 	if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB |
1862 	    MFD_HUGE_MASK)) != 0)
1863 		return (EINVAL);
1864 	/* Size specified but no HUGETLB. */
1865 	if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0)
1866 		return (EINVAL);
1867 	/* We don't actually support HUGETLB. */
1868 	if ((flags & MFD_HUGETLB) != 0)
1869 		return (ENOSYS);
1870 	oflags = O_RDWR;
1871 	shmflags = SHM_GROW_ON_WRITE;
1872 	if ((flags & MFD_CLOEXEC) != 0)
1873 		oflags |= O_CLOEXEC;
1874 	if ((flags & MFD_ALLOW_SEALING) != 0)
1875 		shmflags |= SHM_ALLOW_SEALING;
1876 	return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
1877 	    memfd_name));
1878 }
1879 
1880 int
1881 linux_splice(struct thread *td, struct linux_splice_args *args)
1882 {
1883 
1884 	linux_msg(td, "syscall splice not really implemented");
1885 
1886 	/*
1887 	 * splice(2) is documented to return EINVAL in various circumstances;
1888 	 * returning it instead of ENOSYS should hint the caller to use fallback
1889 	 * instead.
1890 	 */
1891 	return (EINVAL);
1892 }
1893