xref: /freebsd/sys/compat/linux/linux_file.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1995 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/stat.h>
50 #include <sys/sx.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
53 #include <sys/tty.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 
57 #ifdef COMPAT_LINUX32
58 #include <compat/freebsd32/freebsd32_misc.h>
59 #include <machine/../linux32/linux.h>
60 #include <machine/../linux32/linux32_proto.h>
61 #else
62 #include <machine/../linux/linux.h>
63 #include <machine/../linux/linux_proto.h>
64 #endif
65 #include <compat/linux/linux_misc.h>
66 #include <compat/linux/linux_util.h>
67 #include <compat/linux/linux_file.h>
68 
69 static int	linux_common_open(struct thread *, int, const char *, int, int,
70 		    enum uio_seg);
71 static int	linux_getdents_error(struct thread *, int, int);
72 
73 static struct bsd_to_linux_bitmap seal_bitmap[] = {
74 	BITMAP_1t1_LINUX(F_SEAL_SEAL),
75 	BITMAP_1t1_LINUX(F_SEAL_SHRINK),
76 	BITMAP_1t1_LINUX(F_SEAL_GROW),
77 	BITMAP_1t1_LINUX(F_SEAL_WRITE),
78 };
79 
80 #define	MFD_HUGETLB_ENTRY(_size)					\
81 	{								\
82 		.bsd_value = MFD_HUGE_##_size,				\
83 		.linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size	\
84 	}
85 static struct bsd_to_linux_bitmap mfd_bitmap[] = {
86 	BITMAP_1t1_LINUX(MFD_CLOEXEC),
87 	BITMAP_1t1_LINUX(MFD_ALLOW_SEALING),
88 	BITMAP_1t1_LINUX(MFD_HUGETLB),
89 	MFD_HUGETLB_ENTRY(64KB),
90 	MFD_HUGETLB_ENTRY(512KB),
91 	MFD_HUGETLB_ENTRY(1MB),
92 	MFD_HUGETLB_ENTRY(2MB),
93 	MFD_HUGETLB_ENTRY(8MB),
94 	MFD_HUGETLB_ENTRY(16MB),
95 	MFD_HUGETLB_ENTRY(32MB),
96 	MFD_HUGETLB_ENTRY(256MB),
97 	MFD_HUGETLB_ENTRY(512MB),
98 	MFD_HUGETLB_ENTRY(1GB),
99 	MFD_HUGETLB_ENTRY(2GB),
100 	MFD_HUGETLB_ENTRY(16GB),
101 };
102 #undef MFD_HUGETLB_ENTRY
103 
104 #ifdef LINUX_LEGACY_SYSCALLS
105 int
106 linux_creat(struct thread *td, struct linux_creat_args *args)
107 {
108 	char *path;
109 	int error;
110 
111 	if (!LUSECONVPATH(td)) {
112 		error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE,
113 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
114 	} else {
115 		LCONVPATHEXIST(td, args->path, &path);
116 		error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
117 		    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
118 		LFREEPATH(path);
119 	}
120 	return (error);
121 }
122 #endif
123 
124 static int
125 linux_common_open(struct thread *td, int dirfd, const char *path, int l_flags,
126     int mode, enum uio_seg seg)
127 {
128 	struct proc *p = td->td_proc;
129 	struct file *fp;
130 	int fd;
131 	int bsd_flags, error;
132 
133 	bsd_flags = 0;
134 	switch (l_flags & LINUX_O_ACCMODE) {
135 	case LINUX_O_WRONLY:
136 		bsd_flags |= O_WRONLY;
137 		break;
138 	case LINUX_O_RDWR:
139 		bsd_flags |= O_RDWR;
140 		break;
141 	default:
142 		bsd_flags |= O_RDONLY;
143 	}
144 	if (l_flags & LINUX_O_NDELAY)
145 		bsd_flags |= O_NONBLOCK;
146 	if (l_flags & LINUX_O_APPEND)
147 		bsd_flags |= O_APPEND;
148 	if (l_flags & LINUX_O_SYNC)
149 		bsd_flags |= O_FSYNC;
150 	if (l_flags & LINUX_O_CLOEXEC)
151 		bsd_flags |= O_CLOEXEC;
152 	if (l_flags & LINUX_O_NONBLOCK)
153 		bsd_flags |= O_NONBLOCK;
154 	if (l_flags & LINUX_O_ASYNC)
155 		bsd_flags |= O_ASYNC;
156 	if (l_flags & LINUX_O_CREAT)
157 		bsd_flags |= O_CREAT;
158 	if (l_flags & LINUX_O_TRUNC)
159 		bsd_flags |= O_TRUNC;
160 	if (l_flags & LINUX_O_EXCL)
161 		bsd_flags |= O_EXCL;
162 	if (l_flags & LINUX_O_NOCTTY)
163 		bsd_flags |= O_NOCTTY;
164 	if (l_flags & LINUX_O_DIRECT)
165 		bsd_flags |= O_DIRECT;
166 	if (l_flags & LINUX_O_NOFOLLOW)
167 		bsd_flags |= O_NOFOLLOW;
168 	if (l_flags & LINUX_O_DIRECTORY)
169 		bsd_flags |= O_DIRECTORY;
170 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
171 
172 	error = kern_openat(td, dirfd, path, seg, bsd_flags, mode);
173 	if (error != 0) {
174 		if (error == EMLINK)
175 			error = ELOOP;
176 		goto done;
177 	}
178 	if (p->p_flag & P_CONTROLT)
179 		goto done;
180 	if (bsd_flags & O_NOCTTY)
181 		goto done;
182 
183 	/*
184 	 * XXX In between kern_openat() and fget(), another process
185 	 * having the same filedesc could use that fd without
186 	 * checking below.
187 	*/
188 	fd = td->td_retval[0];
189 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
190 		if (fp->f_type != DTYPE_VNODE) {
191 			fdrop(fp, td);
192 			goto done;
193 		}
194 		sx_slock(&proctree_lock);
195 		PROC_LOCK(p);
196 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
197 			PROC_UNLOCK(p);
198 			sx_sunlock(&proctree_lock);
199 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
200 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
201 			    td->td_ucred, td);
202 		} else {
203 			PROC_UNLOCK(p);
204 			sx_sunlock(&proctree_lock);
205 		}
206 		fdrop(fp, td);
207 	}
208 
209 done:
210 	return (error);
211 }
212 
213 int
214 linux_openat(struct thread *td, struct linux_openat_args *args)
215 {
216 	char *path;
217 	int dfd, error;
218 
219 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
220 	if (!LUSECONVPATH(td)) {
221 		return (linux_common_open(td, dfd, args->filename, args->flags,
222 		    args->mode, UIO_USERSPACE));
223 	}
224 	if (args->flags & LINUX_O_CREAT)
225 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
226 	else
227 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
228 
229 	error = linux_common_open(td, dfd, path, args->flags, args->mode,
230 	    UIO_SYSSPACE);
231 	LFREEPATH(path);
232 	return (error);
233 }
234 
235 #ifdef LINUX_LEGACY_SYSCALLS
236 int
237 linux_open(struct thread *td, struct linux_open_args *args)
238 {
239 	char *path;
240 	int error;
241 
242 	if (!LUSECONVPATH(td)) {
243 		return (linux_common_open(td, AT_FDCWD, args->path, args->flags,
244 		    args->mode, UIO_USERSPACE));
245 	}
246 	if (args->flags & LINUX_O_CREAT)
247 		LCONVPATHCREAT(td, args->path, &path);
248 	else
249 		LCONVPATHEXIST(td, args->path, &path);
250 
251 	error = linux_common_open(td, AT_FDCWD, path, args->flags, args->mode,
252 	    UIO_SYSSPACE);
253 	LFREEPATH(path);
254 	return (error);
255 }
256 #endif
257 
258 int
259 linux_lseek(struct thread *td, struct linux_lseek_args *args)
260 {
261 
262 	return (kern_lseek(td, args->fdes, args->off, args->whence));
263 }
264 
265 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
266 int
267 linux_llseek(struct thread *td, struct linux_llseek_args *args)
268 {
269 	int error;
270 	off_t off;
271 
272 	off = (args->olow) | (((off_t) args->ohigh) << 32);
273 
274 	error = kern_lseek(td, args->fd, off, args->whence);
275 	if (error != 0)
276 		return (error);
277 
278 	error = copyout(td->td_retval, args->res, sizeof(off_t));
279 	if (error != 0)
280 		return (error);
281 
282 	td->td_retval[0] = 0;
283 	return (0);
284 }
285 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
286 
287 /*
288  * Note that linux_getdents(2) and linux_getdents64(2) have the same
289  * arguments. They only differ in the definition of struct dirent they
290  * operate on.
291  * Note that linux_readdir(2) is a special case of linux_getdents(2)
292  * where count is always equals 1, meaning that the buffer is one
293  * dirent-structure in size and that the code can't handle more anyway.
294  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
295  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
296  * trash user stack.
297  */
298 
299 static int
300 linux_getdents_error(struct thread *td, int fd, int err)
301 {
302 	struct vnode *vp;
303 	struct file *fp;
304 	int error;
305 
306 	/* Linux return ENOTDIR in case when fd is not a directory. */
307 	error = getvnode(td, fd, &cap_read_rights, &fp);
308 	if (error != 0)
309 		return (error);
310 	vp = fp->f_vnode;
311 	if (vp->v_type != VDIR) {
312 		fdrop(fp, td);
313 		return (ENOTDIR);
314 	}
315 	fdrop(fp, td);
316 	return (err);
317 }
318 
319 struct l_dirent {
320 	l_ulong		d_ino;
321 	l_off_t		d_off;
322 	l_ushort	d_reclen;
323 	char		d_name[LINUX_NAME_MAX + 1];
324 };
325 
326 struct l_dirent64 {
327 	uint64_t	d_ino;
328 	int64_t		d_off;
329 	l_ushort	d_reclen;
330 	u_char		d_type;
331 	char		d_name[LINUX_NAME_MAX + 1];
332 };
333 
334 /*
335  * Linux uses the last byte in the dirent buffer to store d_type,
336  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
337  */
338 #define LINUX_RECLEN(namlen)						\
339     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
340 
341 #define LINUX_RECLEN64(namlen)						\
342     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
343     sizeof(uint64_t))
344 
345 #ifdef LINUX_LEGACY_SYSCALLS
346 int
347 linux_getdents(struct thread *td, struct linux_getdents_args *args)
348 {
349 	struct dirent *bdp;
350 	caddr_t inp, buf;		/* BSD-format */
351 	int len, reclen;		/* BSD-format */
352 	caddr_t outp;			/* Linux-format */
353 	int resid, linuxreclen;		/* Linux-format */
354 	caddr_t lbuf;			/* Linux-format */
355 	off_t base;
356 	struct l_dirent *linux_dirent;
357 	int buflen, error;
358 	size_t retval;
359 
360 	buflen = min(args->count, MAXBSIZE);
361 	buf = malloc(buflen, M_TEMP, M_WAITOK);
362 
363 	error = kern_getdirentries(td, args->fd, buf, buflen,
364 	    &base, NULL, UIO_SYSSPACE);
365 	if (error != 0) {
366 		error = linux_getdents_error(td, args->fd, error);
367 		goto out1;
368 	}
369 
370 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
371 
372 	len = td->td_retval[0];
373 	inp = buf;
374 	outp = (caddr_t)args->dent;
375 	resid = args->count;
376 	retval = 0;
377 
378 	while (len > 0) {
379 		bdp = (struct dirent *) inp;
380 		reclen = bdp->d_reclen;
381 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
382 		/*
383 		 * No more space in the user supplied dirent buffer.
384 		 * Return EINVAL.
385 		 */
386 		if (resid < linuxreclen) {
387 			error = EINVAL;
388 			goto out;
389 		}
390 
391 		linux_dirent = (struct l_dirent*)lbuf;
392 		linux_dirent->d_ino = bdp->d_fileno;
393 		linux_dirent->d_off = base + reclen;
394 		linux_dirent->d_reclen = linuxreclen;
395 		/*
396 		 * Copy d_type to last byte of l_dirent buffer
397 		 */
398 		lbuf[linuxreclen - 1] = bdp->d_type;
399 		strlcpy(linux_dirent->d_name, bdp->d_name,
400 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
401 		error = copyout(linux_dirent, outp, linuxreclen);
402 		if (error != 0)
403 			goto out;
404 
405 		inp += reclen;
406 		base += reclen;
407 		len -= reclen;
408 
409 		retval += linuxreclen;
410 		outp += linuxreclen;
411 		resid -= linuxreclen;
412 	}
413 	td->td_retval[0] = retval;
414 
415 out:
416 	free(lbuf, M_TEMP);
417 out1:
418 	free(buf, M_TEMP);
419 	return (error);
420 }
421 #endif
422 
423 int
424 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
425 {
426 	struct dirent *bdp;
427 	caddr_t inp, buf;		/* BSD-format */
428 	int len, reclen;		/* BSD-format */
429 	caddr_t outp;			/* Linux-format */
430 	int resid, linuxreclen;		/* Linux-format */
431 	caddr_t lbuf;			/* Linux-format */
432 	off_t base;
433 	struct l_dirent64 *linux_dirent64;
434 	int buflen, error;
435 	size_t retval;
436 
437 	buflen = min(args->count, MAXBSIZE);
438 	buf = malloc(buflen, M_TEMP, M_WAITOK);
439 
440 	error = kern_getdirentries(td, args->fd, buf, buflen,
441 	    &base, NULL, UIO_SYSSPACE);
442 	if (error != 0) {
443 		error = linux_getdents_error(td, args->fd, error);
444 		goto out1;
445 	}
446 
447 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
448 
449 	len = td->td_retval[0];
450 	inp = buf;
451 	outp = (caddr_t)args->dirent;
452 	resid = args->count;
453 	retval = 0;
454 
455 	while (len > 0) {
456 		bdp = (struct dirent *) inp;
457 		reclen = bdp->d_reclen;
458 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
459 		/*
460 		 * No more space in the user supplied dirent buffer.
461 		 * Return EINVAL.
462 		 */
463 		if (resid < linuxreclen) {
464 			error = EINVAL;
465 			goto out;
466 		}
467 
468 		linux_dirent64 = (struct l_dirent64*)lbuf;
469 		linux_dirent64->d_ino = bdp->d_fileno;
470 		linux_dirent64->d_off = base + reclen;
471 		linux_dirent64->d_reclen = linuxreclen;
472 		linux_dirent64->d_type = bdp->d_type;
473 		strlcpy(linux_dirent64->d_name, bdp->d_name,
474 		    linuxreclen - offsetof(struct l_dirent64, d_name));
475 		error = copyout(linux_dirent64, outp, linuxreclen);
476 		if (error != 0)
477 			goto out;
478 
479 		inp += reclen;
480 		base += reclen;
481 		len -= reclen;
482 
483 		retval += linuxreclen;
484 		outp += linuxreclen;
485 		resid -= linuxreclen;
486 	}
487 	td->td_retval[0] = retval;
488 
489 out:
490 	free(lbuf, M_TEMP);
491 out1:
492 	free(buf, M_TEMP);
493 	return (error);
494 }
495 
496 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
497 int
498 linux_readdir(struct thread *td, struct linux_readdir_args *args)
499 {
500 	struct dirent *bdp;
501 	caddr_t buf;			/* BSD-format */
502 	int linuxreclen;		/* Linux-format */
503 	caddr_t lbuf;			/* Linux-format */
504 	off_t base;
505 	struct l_dirent *linux_dirent;
506 	int buflen, error;
507 
508 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
509 	buf = malloc(buflen, M_TEMP, M_WAITOK);
510 
511 	error = kern_getdirentries(td, args->fd, buf, buflen,
512 	    &base, NULL, UIO_SYSSPACE);
513 	if (error != 0) {
514 		error = linux_getdents_error(td, args->fd, error);
515 		goto out;
516 	}
517 	if (td->td_retval[0] == 0)
518 		goto out;
519 
520 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
521 
522 	bdp = (struct dirent *) buf;
523 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
524 
525 	linux_dirent = (struct l_dirent*)lbuf;
526 	linux_dirent->d_ino = bdp->d_fileno;
527 	linux_dirent->d_off = linuxreclen;
528 	linux_dirent->d_reclen = bdp->d_namlen;
529 	strlcpy(linux_dirent->d_name, bdp->d_name,
530 	    linuxreclen - offsetof(struct l_dirent, d_name));
531 	error = copyout(linux_dirent, args->dent, linuxreclen);
532 	if (error == 0)
533 		td->td_retval[0] = linuxreclen;
534 
535 	free(lbuf, M_TEMP);
536 out:
537 	free(buf, M_TEMP);
538 	return (error);
539 }
540 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
541 
542 
543 /*
544  * These exist mainly for hooks for doing /compat/linux translation.
545  */
546 
547 #ifdef LINUX_LEGACY_SYSCALLS
548 int
549 linux_access(struct thread *td, struct linux_access_args *args)
550 {
551 	char *path;
552 	int error;
553 
554 	/* Linux convention. */
555 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
556 		return (EINVAL);
557 
558 	if (!LUSECONVPATH(td)) {
559 		error = kern_accessat(td, AT_FDCWD, args->path, UIO_USERSPACE, 0,
560 		    args->amode);
561 	} else {
562 		LCONVPATHEXIST(td, args->path, &path);
563 		error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
564 		    args->amode);
565 		LFREEPATH(path);
566 	}
567 
568 	return (error);
569 }
570 #endif
571 
572 int
573 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
574 {
575 	char *path;
576 	int error, dfd;
577 
578 	/* Linux convention. */
579 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
580 		return (EINVAL);
581 
582 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
583 	if (!LUSECONVPATH(td)) {
584 		error = kern_accessat(td, dfd, args->filename, UIO_USERSPACE, 0, args->amode);
585 	} else {
586 		LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
587 		error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
588 		LFREEPATH(path);
589 	}
590 
591 	return (error);
592 }
593 
594 #ifdef LINUX_LEGACY_SYSCALLS
595 int
596 linux_unlink(struct thread *td, struct linux_unlink_args *args)
597 {
598 	char *path;
599 	int error;
600 	struct stat st;
601 
602 	if (!LUSECONVPATH(td)) {
603 		error = kern_funlinkat(td, AT_FDCWD, args->path, FD_NONE,
604 		    UIO_USERSPACE, 0, 0);
605 		if (error == EPERM) {
606 			/* Introduce POSIX noncompliant behaviour of Linux */
607 			if (kern_statat(td, 0, AT_FDCWD, args->path,
608 			    UIO_SYSSPACE, &st, NULL) == 0) {
609 				if (S_ISDIR(st.st_mode))
610 					error = EISDIR;
611 			}
612 		}
613 	} else {
614 		LCONVPATHEXIST(td, args->path, &path);
615 		error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
616 		if (error == EPERM) {
617 			/* Introduce POSIX noncompliant behaviour of Linux */
618 			if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
619 			    NULL) == 0) {
620 				if (S_ISDIR(st.st_mode))
621 					error = EISDIR;
622 			}
623 		}
624 		LFREEPATH(path);
625 	}
626 
627 	return (error);
628 }
629 #endif
630 
631 int
632 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
633 {
634 	char *path;
635 	int error, dfd;
636 	struct stat st;
637 
638 	if (args->flag & ~LINUX_AT_REMOVEDIR)
639 		return (EINVAL);
640 
641 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
642 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
643 
644 	if (args->flag & LINUX_AT_REMOVEDIR)
645 		error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0);
646 	else
647 		error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0,
648 		    0);
649 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
650 		/* Introduce POSIX noncompliant behaviour of Linux */
651 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
652 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
653 			error = EISDIR;
654 	}
655 	LFREEPATH(path);
656 	return (error);
657 }
658 int
659 linux_chdir(struct thread *td, struct linux_chdir_args *args)
660 {
661 	char *path;
662 	int error;
663 
664 	LCONVPATHEXIST(td, args->path, &path);
665 
666 	error = kern_chdir(td, path, UIO_SYSSPACE);
667 	LFREEPATH(path);
668 	return (error);
669 }
670 
671 #ifdef LINUX_LEGACY_SYSCALLS
672 int
673 linux_chmod(struct thread *td, struct linux_chmod_args *args)
674 {
675 	char *path;
676 	int error;
677 
678 	LCONVPATHEXIST(td, args->path, &path);
679 
680 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE,
681 	    args->mode, 0);
682 	LFREEPATH(path);
683 	return (error);
684 }
685 #endif
686 
687 int
688 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
689 {
690 	char *path;
691 	int error, dfd;
692 
693 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
694 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
695 
696 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
697 	LFREEPATH(path);
698 	return (error);
699 }
700 
701 #ifdef LINUX_LEGACY_SYSCALLS
702 int
703 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
704 {
705 	char *path;
706 	int error;
707 
708 	LCONVPATHCREAT(td, args->path, &path);
709 
710 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
711 	LFREEPATH(path);
712 	return (error);
713 }
714 #endif
715 
716 int
717 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
718 {
719 	char *path;
720 	int error, dfd;
721 
722 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
723 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
724 
725 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
726 	LFREEPATH(path);
727 	return (error);
728 }
729 
730 #ifdef LINUX_LEGACY_SYSCALLS
731 int
732 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
733 {
734 	char *path;
735 	int error;
736 
737 	LCONVPATHEXIST(td, args->path, &path);
738 
739 	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
740 	LFREEPATH(path);
741 	return (error);
742 }
743 
744 int
745 linux_rename(struct thread *td, struct linux_rename_args *args)
746 {
747 	char *from, *to;
748 	int error;
749 
750 	LCONVPATHEXIST(td, args->from, &from);
751 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
752 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
753 	if (to == NULL) {
754 		LFREEPATH(from);
755 		return (error);
756 	}
757 
758 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
759 	LFREEPATH(from);
760 	LFREEPATH(to);
761 	return (error);
762 }
763 #endif
764 
765 int
766 linux_renameat(struct thread *td, struct linux_renameat_args *args)
767 {
768 	struct linux_renameat2_args renameat2_args = {
769 	    .olddfd = args->olddfd,
770 	    .oldname = args->oldname,
771 	    .newdfd = args->newdfd,
772 	    .newname = args->newname,
773 	    .flags = 0
774 	};
775 
776 	return (linux_renameat2(td, &renameat2_args));
777 }
778 
779 int
780 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
781 {
782 	char *from, *to;
783 	int error, olddfd, newdfd;
784 
785 	if (args->flags != 0) {
786 		if (args->flags & ~(LINUX_RENAME_EXCHANGE |
787 		    LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
788 			return (EINVAL);
789 		if (args->flags & LINUX_RENAME_EXCHANGE &&
790 		    args->flags & (LINUX_RENAME_NOREPLACE |
791 		    LINUX_RENAME_WHITEOUT))
792 			return (EINVAL);
793 		linux_msg(td, "renameat2 unsupported flags 0x%x",
794 		    args->flags);
795 		return (EINVAL);
796 	}
797 
798 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
799 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
800 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
801 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
802 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
803 	if (to == NULL) {
804 		LFREEPATH(from);
805 		return (error);
806 	}
807 
808 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
809 	LFREEPATH(from);
810 	LFREEPATH(to);
811 	return (error);
812 }
813 
814 #ifdef LINUX_LEGACY_SYSCALLS
815 int
816 linux_symlink(struct thread *td, struct linux_symlink_args *args)
817 {
818 	char *path, *to;
819 	int error;
820 
821 	LCONVPATHEXIST(td, args->path, &path);
822 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
823 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
824 	if (to == NULL) {
825 		LFREEPATH(path);
826 		return (error);
827 	}
828 
829 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
830 	LFREEPATH(path);
831 	LFREEPATH(to);
832 	return (error);
833 }
834 #endif
835 
836 int
837 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
838 {
839 	char *path, *to;
840 	int error, dfd;
841 
842 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
843 	LCONVPATHEXIST(td, args->oldname, &path);
844 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
845 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
846 	if (to == NULL) {
847 		LFREEPATH(path);
848 		return (error);
849 	}
850 
851 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
852 	LFREEPATH(path);
853 	LFREEPATH(to);
854 	return (error);
855 }
856 
857 #ifdef LINUX_LEGACY_SYSCALLS
858 int
859 linux_readlink(struct thread *td, struct linux_readlink_args *args)
860 {
861 	char *name;
862 	int error;
863 
864 	LCONVPATHEXIST(td, args->name, &name);
865 
866 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
867 	    args->buf, UIO_USERSPACE, args->count);
868 	LFREEPATH(name);
869 	return (error);
870 }
871 #endif
872 
873 int
874 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
875 {
876 	char *name;
877 	int error, dfd;
878 
879 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
880 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
881 
882 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
883 	    UIO_USERSPACE, args->bufsiz);
884 	LFREEPATH(name);
885 	return (error);
886 }
887 
888 int
889 linux_truncate(struct thread *td, struct linux_truncate_args *args)
890 {
891 	char *path;
892 	int error;
893 
894 	LCONVPATHEXIST(td, args->path, &path);
895 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
896 	LFREEPATH(path);
897 	return (error);
898 }
899 
900 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
901 int
902 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
903 {
904 	char *path;
905 	off_t length;
906 	int error;
907 
908 #if defined(__amd64__) && defined(COMPAT_LINUX32)
909 	length = PAIR32TO64(off_t, args->length);
910 #else
911 	length = args->length;
912 #endif
913 
914 	LCONVPATHEXIST(td, args->path, &path);
915 	error = kern_truncate(td, path, UIO_SYSSPACE, length);
916 	LFREEPATH(path);
917 	return (error);
918 }
919 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
920 
921 int
922 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
923 {
924 
925 	return (kern_ftruncate(td, args->fd, args->length));
926 }
927 
928 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
929 int
930 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
931 {
932 	off_t length;
933 
934 #if defined(__amd64__) && defined(COMPAT_LINUX32)
935 	length = PAIR32TO64(off_t, args->length);
936 #else
937 	length = args->length;
938 #endif
939 
940 	return (kern_ftruncate(td, args->fd, length));
941 }
942 #endif
943 
944 #ifdef LINUX_LEGACY_SYSCALLS
945 int
946 linux_link(struct thread *td, struct linux_link_args *args)
947 {
948 	char *path, *to;
949 	int error;
950 
951 	LCONVPATHEXIST(td, args->path, &path);
952 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
953 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
954 	if (to == NULL) {
955 		LFREEPATH(path);
956 		return (error);
957 	}
958 
959 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
960 	    FOLLOW);
961 	LFREEPATH(path);
962 	LFREEPATH(to);
963 	return (error);
964 }
965 #endif
966 
967 int
968 linux_linkat(struct thread *td, struct linux_linkat_args *args)
969 {
970 	char *path, *to;
971 	int error, olddfd, newdfd, follow;
972 
973 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
974 		return (EINVAL);
975 
976 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
977 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
978 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
979 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
980 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
981 	if (to == NULL) {
982 		LFREEPATH(path);
983 		return (error);
984 	}
985 
986 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
987 	    FOLLOW;
988 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
989 	LFREEPATH(path);
990 	LFREEPATH(to);
991 	return (error);
992 }
993 
994 int
995 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
996 {
997 
998 	return (kern_fsync(td, uap->fd, false));
999 }
1000 
1001 int
1002 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
1003 {
1004 	off_t nbytes, offset;
1005 
1006 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1007 	nbytes = PAIR32TO64(off_t, uap->nbytes);
1008 	offset = PAIR32TO64(off_t, uap->offset);
1009 #else
1010 	nbytes = uap->nbytes;
1011 	offset = uap->offset;
1012 #endif
1013 
1014 	if (offset < 0 || nbytes < 0 ||
1015 	    (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
1016 	    LINUX_SYNC_FILE_RANGE_WRITE |
1017 	    LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
1018 		return (EINVAL);
1019 	}
1020 
1021 	return (kern_fsync(td, uap->fd, false));
1022 }
1023 
1024 int
1025 linux_pread(struct thread *td, struct linux_pread_args *uap)
1026 {
1027 	struct vnode *vp;
1028 	off_t offset;
1029 	int error;
1030 
1031 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1032 	offset = PAIR32TO64(off_t, uap->offset);
1033 #else
1034 	offset = uap->offset;
1035 #endif
1036 
1037 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
1038 	if (error == 0) {
1039 		/* This seems to violate POSIX but Linux does it. */
1040 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
1041 		if (error != 0)
1042 			return (error);
1043 		if (vp->v_type == VDIR)
1044 			error = EISDIR;
1045 		vrele(vp);
1046 	}
1047 	return (error);
1048 }
1049 
1050 int
1051 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
1052 {
1053 	off_t offset;
1054 
1055 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1056 	offset = PAIR32TO64(off_t, uap->offset);
1057 #else
1058 	offset = uap->offset;
1059 #endif
1060 
1061 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
1062 }
1063 
1064 int
1065 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
1066 {
1067 	struct uio *auio;
1068 	int error;
1069 	off_t offset;
1070 
1071 	/*
1072 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1073 	 * pos_l and pos_h, respectively, contain the
1074 	 * low order and high order 32 bits of offset.
1075 	 */
1076 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1077 	    (sizeof(offset) * 4)) | uap->pos_l;
1078 	if (offset < 0)
1079 		return (EINVAL);
1080 #ifdef COMPAT_LINUX32
1081 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1082 #else
1083 	error = copyinuio(uap->vec, uap->vlen, &auio);
1084 #endif
1085 	if (error != 0)
1086 		return (error);
1087 	error = kern_preadv(td, uap->fd, auio, offset);
1088 	free(auio, M_IOV);
1089 	return (error);
1090 }
1091 
1092 int
1093 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1094 {
1095 	struct uio *auio;
1096 	int error;
1097 	off_t offset;
1098 
1099 	/*
1100 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1101 	 * pos_l and pos_h, respectively, contain the
1102 	 * low order and high order 32 bits of offset.
1103 	 */
1104 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1105 	    (sizeof(offset) * 4)) | uap->pos_l;
1106 	if (offset < 0)
1107 		return (EINVAL);
1108 #ifdef COMPAT_LINUX32
1109 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1110 #else
1111 	error = copyinuio(uap->vec, uap->vlen, &auio);
1112 #endif
1113 	if (error != 0)
1114 		return (error);
1115 	error = kern_pwritev(td, uap->fd, auio, offset);
1116 	free(auio, M_IOV);
1117 	return (error);
1118 }
1119 
1120 int
1121 linux_mount(struct thread *td, struct linux_mount_args *args)
1122 {
1123 	char fstypename[MFSNAMELEN];
1124 	char *mntonname, *mntfromname;
1125 	int error, fsflags;
1126 
1127 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1128 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1129 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
1130 	    NULL);
1131 	if (error != 0)
1132 		goto out;
1133 	if (args->specialfile != NULL) {
1134 		error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1135 		if (error != 0)
1136 			goto out;
1137 	} else {
1138 		mntfromname[0] = '\0';
1139 	}
1140 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1141 	if (error != 0)
1142 		goto out;
1143 
1144 	if (strcmp(fstypename, "ext2") == 0) {
1145 		strcpy(fstypename, "ext2fs");
1146 	} else if (strcmp(fstypename, "proc") == 0) {
1147 		strcpy(fstypename, "linprocfs");
1148 	} else if (strcmp(fstypename, "vfat") == 0) {
1149 		strcpy(fstypename, "msdosfs");
1150 	}
1151 
1152 	fsflags = 0;
1153 
1154 	/*
1155 	 * Linux SYNC flag is not included; the closest equivalent
1156 	 * FreeBSD has is !ASYNC, which is our default.
1157 	 */
1158 	if (args->rwflag & LINUX_MS_RDONLY)
1159 		fsflags |= MNT_RDONLY;
1160 	if (args->rwflag & LINUX_MS_NOSUID)
1161 		fsflags |= MNT_NOSUID;
1162 	if (args->rwflag & LINUX_MS_NOEXEC)
1163 		fsflags |= MNT_NOEXEC;
1164 	if (args->rwflag & LINUX_MS_REMOUNT)
1165 		fsflags |= MNT_UPDATE;
1166 
1167 	error = kernel_vmount(fsflags,
1168 	    "fstype", fstypename,
1169 	    "fspath", mntonname,
1170 	    "from", mntfromname,
1171 	    NULL);
1172 out:
1173 	free(mntonname, M_TEMP);
1174 	free(mntfromname, M_TEMP);
1175 	return (error);
1176 }
1177 
1178 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1179 int
1180 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1181 {
1182 
1183 	return (kern_unmount(td, args->path, 0));
1184 }
1185 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1186 
1187 #ifdef LINUX_LEGACY_SYSCALLS
1188 int
1189 linux_umount(struct thread *td, struct linux_umount_args *args)
1190 {
1191 	int flags;
1192 
1193 	flags = 0;
1194 	if ((args->flags & LINUX_MNT_FORCE) != 0) {
1195 		args->flags &= ~LINUX_MNT_FORCE;
1196 		flags |= MNT_FORCE;
1197 	}
1198 	if (args->flags != 0) {
1199 		linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1200 		return (EINVAL);
1201 	}
1202 
1203 	return (kern_unmount(td, args->path, flags));
1204 }
1205 #endif
1206 
1207 /*
1208  * fcntl family of syscalls
1209  */
1210 
1211 struct l_flock {
1212 	l_short		l_type;
1213 	l_short		l_whence;
1214 	l_off_t		l_start;
1215 	l_off_t		l_len;
1216 	l_pid_t		l_pid;
1217 }
1218 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1219 __packed
1220 #endif
1221 ;
1222 
1223 static void
1224 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1225 {
1226 	switch (linux_flock->l_type) {
1227 	case LINUX_F_RDLCK:
1228 		bsd_flock->l_type = F_RDLCK;
1229 		break;
1230 	case LINUX_F_WRLCK:
1231 		bsd_flock->l_type = F_WRLCK;
1232 		break;
1233 	case LINUX_F_UNLCK:
1234 		bsd_flock->l_type = F_UNLCK;
1235 		break;
1236 	default:
1237 		bsd_flock->l_type = -1;
1238 		break;
1239 	}
1240 	bsd_flock->l_whence = linux_flock->l_whence;
1241 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1242 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1243 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1244 	bsd_flock->l_sysid = 0;
1245 }
1246 
1247 static void
1248 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1249 {
1250 	switch (bsd_flock->l_type) {
1251 	case F_RDLCK:
1252 		linux_flock->l_type = LINUX_F_RDLCK;
1253 		break;
1254 	case F_WRLCK:
1255 		linux_flock->l_type = LINUX_F_WRLCK;
1256 		break;
1257 	case F_UNLCK:
1258 		linux_flock->l_type = LINUX_F_UNLCK;
1259 		break;
1260 	}
1261 	linux_flock->l_whence = bsd_flock->l_whence;
1262 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1263 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1264 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1265 }
1266 
1267 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1268 struct l_flock64 {
1269 	l_short		l_type;
1270 	l_short		l_whence;
1271 	l_loff_t	l_start;
1272 	l_loff_t	l_len;
1273 	l_pid_t		l_pid;
1274 }
1275 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1276 __packed
1277 #endif
1278 ;
1279 
1280 static void
1281 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1282 {
1283 	switch (linux_flock->l_type) {
1284 	case LINUX_F_RDLCK:
1285 		bsd_flock->l_type = F_RDLCK;
1286 		break;
1287 	case LINUX_F_WRLCK:
1288 		bsd_flock->l_type = F_WRLCK;
1289 		break;
1290 	case LINUX_F_UNLCK:
1291 		bsd_flock->l_type = F_UNLCK;
1292 		break;
1293 	default:
1294 		bsd_flock->l_type = -1;
1295 		break;
1296 	}
1297 	bsd_flock->l_whence = linux_flock->l_whence;
1298 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1299 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1300 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1301 	bsd_flock->l_sysid = 0;
1302 }
1303 
1304 static void
1305 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1306 {
1307 	switch (bsd_flock->l_type) {
1308 	case F_RDLCK:
1309 		linux_flock->l_type = LINUX_F_RDLCK;
1310 		break;
1311 	case F_WRLCK:
1312 		linux_flock->l_type = LINUX_F_WRLCK;
1313 		break;
1314 	case F_UNLCK:
1315 		linux_flock->l_type = LINUX_F_UNLCK;
1316 		break;
1317 	}
1318 	linux_flock->l_whence = bsd_flock->l_whence;
1319 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1320 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1321 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1322 }
1323 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1324 
1325 static int
1326 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1327 {
1328 	struct l_flock linux_flock;
1329 	struct flock bsd_flock;
1330 	struct file *fp;
1331 	long arg;
1332 	int error, result;
1333 
1334 	switch (args->cmd) {
1335 	case LINUX_F_DUPFD:
1336 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1337 
1338 	case LINUX_F_GETFD:
1339 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1340 
1341 	case LINUX_F_SETFD:
1342 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1343 
1344 	case LINUX_F_GETFL:
1345 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1346 		result = td->td_retval[0];
1347 		td->td_retval[0] = 0;
1348 		if (result & O_RDONLY)
1349 			td->td_retval[0] |= LINUX_O_RDONLY;
1350 		if (result & O_WRONLY)
1351 			td->td_retval[0] |= LINUX_O_WRONLY;
1352 		if (result & O_RDWR)
1353 			td->td_retval[0] |= LINUX_O_RDWR;
1354 		if (result & O_NDELAY)
1355 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1356 		if (result & O_APPEND)
1357 			td->td_retval[0] |= LINUX_O_APPEND;
1358 		if (result & O_FSYNC)
1359 			td->td_retval[0] |= LINUX_O_SYNC;
1360 		if (result & O_ASYNC)
1361 			td->td_retval[0] |= LINUX_O_ASYNC;
1362 #ifdef LINUX_O_NOFOLLOW
1363 		if (result & O_NOFOLLOW)
1364 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1365 #endif
1366 #ifdef LINUX_O_DIRECT
1367 		if (result & O_DIRECT)
1368 			td->td_retval[0] |= LINUX_O_DIRECT;
1369 #endif
1370 		return (error);
1371 
1372 	case LINUX_F_SETFL:
1373 		arg = 0;
1374 		if (args->arg & LINUX_O_NDELAY)
1375 			arg |= O_NONBLOCK;
1376 		if (args->arg & LINUX_O_APPEND)
1377 			arg |= O_APPEND;
1378 		if (args->arg & LINUX_O_SYNC)
1379 			arg |= O_FSYNC;
1380 		if (args->arg & LINUX_O_ASYNC)
1381 			arg |= O_ASYNC;
1382 #ifdef LINUX_O_NOFOLLOW
1383 		if (args->arg & LINUX_O_NOFOLLOW)
1384 			arg |= O_NOFOLLOW;
1385 #endif
1386 #ifdef LINUX_O_DIRECT
1387 		if (args->arg & LINUX_O_DIRECT)
1388 			arg |= O_DIRECT;
1389 #endif
1390 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1391 
1392 	case LINUX_F_GETLK:
1393 		error = copyin((void *)args->arg, &linux_flock,
1394 		    sizeof(linux_flock));
1395 		if (error)
1396 			return (error);
1397 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1398 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1399 		if (error)
1400 			return (error);
1401 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1402 		return (copyout(&linux_flock, (void *)args->arg,
1403 		    sizeof(linux_flock)));
1404 
1405 	case LINUX_F_SETLK:
1406 		error = copyin((void *)args->arg, &linux_flock,
1407 		    sizeof(linux_flock));
1408 		if (error)
1409 			return (error);
1410 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1411 		return (kern_fcntl(td, args->fd, F_SETLK,
1412 		    (intptr_t)&bsd_flock));
1413 
1414 	case LINUX_F_SETLKW:
1415 		error = copyin((void *)args->arg, &linux_flock,
1416 		    sizeof(linux_flock));
1417 		if (error)
1418 			return (error);
1419 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1420 		return (kern_fcntl(td, args->fd, F_SETLKW,
1421 		     (intptr_t)&bsd_flock));
1422 
1423 	case LINUX_F_GETOWN:
1424 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1425 
1426 	case LINUX_F_SETOWN:
1427 		/*
1428 		 * XXX some Linux applications depend on F_SETOWN having no
1429 		 * significant effect for pipes (SIGIO is not delivered for
1430 		 * pipes under Linux-2.2.35 at least).
1431 		 */
1432 		error = fget(td, args->fd,
1433 		    &cap_fcntl_rights, &fp);
1434 		if (error)
1435 			return (error);
1436 		if (fp->f_type == DTYPE_PIPE) {
1437 			fdrop(fp, td);
1438 			return (EINVAL);
1439 		}
1440 		fdrop(fp, td);
1441 
1442 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1443 
1444 	case LINUX_F_DUPFD_CLOEXEC:
1445 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1446 	/*
1447 	 * Our F_SEAL_* values match Linux one for maximum compatibility.  So we
1448 	 * only needed to account for different values for fcntl(2) commands.
1449 	 */
1450 	case LINUX_F_GET_SEALS:
1451 		error = kern_fcntl(td, args->fd, F_GET_SEALS, 0);
1452 		if (error != 0)
1453 			return (error);
1454 		td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0],
1455 		    seal_bitmap, 0);
1456 		return (0);
1457 
1458 	case LINUX_F_ADD_SEALS:
1459 		return (kern_fcntl(td, args->fd, F_ADD_SEALS,
1460 		    linux_to_bsd_bits(args->arg, seal_bitmap, 0)));
1461 	default:
1462 		linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd);
1463 		return (EINVAL);
1464 	}
1465 }
1466 
1467 int
1468 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1469 {
1470 
1471 	return (fcntl_common(td, args));
1472 }
1473 
1474 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1475 int
1476 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1477 {
1478 	struct l_flock64 linux_flock;
1479 	struct flock bsd_flock;
1480 	struct linux_fcntl_args fcntl_args;
1481 	int error;
1482 
1483 	switch (args->cmd) {
1484 	case LINUX_F_GETLK64:
1485 		error = copyin((void *)args->arg, &linux_flock,
1486 		    sizeof(linux_flock));
1487 		if (error)
1488 			return (error);
1489 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1490 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1491 		if (error)
1492 			return (error);
1493 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1494 		return (copyout(&linux_flock, (void *)args->arg,
1495 			    sizeof(linux_flock)));
1496 
1497 	case LINUX_F_SETLK64:
1498 		error = copyin((void *)args->arg, &linux_flock,
1499 		    sizeof(linux_flock));
1500 		if (error)
1501 			return (error);
1502 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1503 		return (kern_fcntl(td, args->fd, F_SETLK,
1504 		    (intptr_t)&bsd_flock));
1505 
1506 	case LINUX_F_SETLKW64:
1507 		error = copyin((void *)args->arg, &linux_flock,
1508 		    sizeof(linux_flock));
1509 		if (error)
1510 			return (error);
1511 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1512 		return (kern_fcntl(td, args->fd, F_SETLKW,
1513 		    (intptr_t)&bsd_flock));
1514 	}
1515 
1516 	fcntl_args.fd = args->fd;
1517 	fcntl_args.cmd = args->cmd;
1518 	fcntl_args.arg = args->arg;
1519 	return (fcntl_common(td, &fcntl_args));
1520 }
1521 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1522 
1523 #ifdef LINUX_LEGACY_SYSCALLS
1524 int
1525 linux_chown(struct thread *td, struct linux_chown_args *args)
1526 {
1527 	char *path;
1528 	int error;
1529 
1530 	LCONVPATHEXIST(td, args->path, &path);
1531 
1532 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1533 	    args->gid, 0);
1534 	LFREEPATH(path);
1535 	return (error);
1536 }
1537 #endif
1538 
1539 int
1540 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1541 {
1542 	char *path;
1543 	int error, dfd, flag;
1544 
1545 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
1546 		return (EINVAL);
1547 
1548 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
1549 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1550 
1551 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1552 	    AT_SYMLINK_NOFOLLOW;
1553 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1554 	    flag);
1555 	LFREEPATH(path);
1556 	return (error);
1557 }
1558 
1559 #ifdef LINUX_LEGACY_SYSCALLS
1560 int
1561 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1562 {
1563 	char *path;
1564 	int error;
1565 
1566 	LCONVPATHEXIST(td, args->path, &path);
1567 
1568 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1569 	    args->gid, AT_SYMLINK_NOFOLLOW);
1570 	LFREEPATH(path);
1571 	return (error);
1572 }
1573 #endif
1574 
1575 static int
1576 convert_fadvice(int advice)
1577 {
1578 	switch (advice) {
1579 	case LINUX_POSIX_FADV_NORMAL:
1580 		return (POSIX_FADV_NORMAL);
1581 	case LINUX_POSIX_FADV_RANDOM:
1582 		return (POSIX_FADV_RANDOM);
1583 	case LINUX_POSIX_FADV_SEQUENTIAL:
1584 		return (POSIX_FADV_SEQUENTIAL);
1585 	case LINUX_POSIX_FADV_WILLNEED:
1586 		return (POSIX_FADV_WILLNEED);
1587 	case LINUX_POSIX_FADV_DONTNEED:
1588 		return (POSIX_FADV_DONTNEED);
1589 	case LINUX_POSIX_FADV_NOREUSE:
1590 		return (POSIX_FADV_NOREUSE);
1591 	default:
1592 		return (-1);
1593 	}
1594 }
1595 
1596 int
1597 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1598 {
1599 	off_t offset;
1600 	int advice;
1601 
1602 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1603 	offset = PAIR32TO64(off_t, args->offset);
1604 #else
1605 	offset = args->offset;
1606 #endif
1607 
1608 	advice = convert_fadvice(args->advice);
1609 	if (advice == -1)
1610 		return (EINVAL);
1611 	return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1612 }
1613 
1614 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1615 int
1616 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1617 {
1618 	off_t len, offset;
1619 	int advice;
1620 
1621 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1622 	len = PAIR32TO64(off_t, args->len);
1623 	offset = PAIR32TO64(off_t, args->offset);
1624 #else
1625 	len = args->len;
1626 	offset = args->offset;
1627 #endif
1628 
1629 	advice = convert_fadvice(args->advice);
1630 	if (advice == -1)
1631 		return (EINVAL);
1632 	return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1633 }
1634 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1635 
1636 #ifdef LINUX_LEGACY_SYSCALLS
1637 int
1638 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1639 {
1640 	int fildes[2];
1641 	int error;
1642 
1643 	error = kern_pipe(td, fildes, 0, NULL, NULL);
1644 	if (error != 0)
1645 		return (error);
1646 
1647 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1648 	if (error != 0) {
1649 		(void)kern_close(td, fildes[0]);
1650 		(void)kern_close(td, fildes[1]);
1651 	}
1652 
1653 	return (error);
1654 }
1655 #endif
1656 
1657 int
1658 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1659 {
1660 	int fildes[2];
1661 	int error, flags;
1662 
1663 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1664 		return (EINVAL);
1665 
1666 	flags = 0;
1667 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
1668 		flags |= O_NONBLOCK;
1669 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
1670 		flags |= O_CLOEXEC;
1671 	error = kern_pipe(td, fildes, flags, NULL, NULL);
1672 	if (error != 0)
1673 		return (error);
1674 
1675 	error = copyout(fildes, args->pipefds, sizeof(fildes));
1676 	if (error != 0) {
1677 		(void)kern_close(td, fildes[0]);
1678 		(void)kern_close(td, fildes[1]);
1679 	}
1680 
1681 	return (error);
1682 }
1683 
1684 int
1685 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1686 {
1687 	int cmd;
1688 	intptr_t newfd;
1689 
1690 	if (args->oldfd == args->newfd)
1691 		return (EINVAL);
1692 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1693 		return (EINVAL);
1694 	if (args->flags & LINUX_O_CLOEXEC)
1695 		cmd = F_DUP2FD_CLOEXEC;
1696 	else
1697 		cmd = F_DUP2FD;
1698 
1699 	newfd = args->newfd;
1700 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
1701 }
1702 
1703 int
1704 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1705 {
1706 	off_t len, offset;
1707 
1708 	/*
1709 	 * We emulate only posix_fallocate system call for which
1710 	 * mode should be 0.
1711 	 */
1712 	if (args->mode != 0)
1713 		return (EOPNOTSUPP);
1714 
1715 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1716 	len = PAIR32TO64(off_t, args->len);
1717 	offset = PAIR32TO64(off_t, args->offset);
1718 #else
1719 	len = args->len;
1720 	offset = args->offset;
1721 #endif
1722 
1723 	return (kern_posix_fallocate(td, args->fd, offset, len));
1724 }
1725 
1726 int
1727 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1728     *args)
1729 {
1730 	l_loff_t inoff, outoff, *inoffp, *outoffp;
1731 	int error, flags;
1732 
1733 	/*
1734 	 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1735 	 * the native implementation.  Enforce it.
1736 	 */
1737 	if (args->flags != 0) {
1738 		linux_msg(td, "copy_file_range unsupported flags 0x%x",
1739 		    args->flags);
1740 		return (EINVAL);
1741 	}
1742 	flags = 0;
1743 	inoffp = outoffp = NULL;
1744 	if (args->off_in != NULL) {
1745 		error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1746 		if (error != 0)
1747 			return (error);
1748 		inoffp = &inoff;
1749 	}
1750 	if (args->off_out != NULL) {
1751 		error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1752 		if (error != 0)
1753 			return (error);
1754 		outoffp = &outoff;
1755 	}
1756 
1757 	error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1758 	    outoffp, args->len, flags);
1759 	if (error == 0 && args->off_in != NULL)
1760 		error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1761 	if (error == 0 && args->off_out != NULL)
1762 		error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1763 	return (error);
1764 }
1765 
1766 #define	LINUX_MEMFD_PREFIX	"memfd:"
1767 
1768 int
1769 linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
1770 {
1771 	char memfd_name[LINUX_NAME_MAX + 1];
1772 	int error, flags, shmflags, oflags;
1773 
1774 	/*
1775 	 * This is our clever trick to avoid the heap allocation to copy in the
1776 	 * uname.  We don't really need to go this far out of our way, but it
1777 	 * does keep the rest of this function fairly clean as they don't have
1778 	 * to worry about cleanup on the way out.
1779 	 */
1780 	error = copyinstr(args->uname_ptr,
1781 	    memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1,
1782 	    LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL);
1783 	if (error != 0) {
1784 		if (error == ENAMETOOLONG)
1785 			error = EINVAL;
1786 		return (error);
1787 	}
1788 
1789 	memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1);
1790 	flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0);
1791 	if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB |
1792 	    MFD_HUGE_MASK)) != 0)
1793 		return (EINVAL);
1794 	/* Size specified but no HUGETLB. */
1795 	if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0)
1796 		return (EINVAL);
1797 	/* We don't actually support HUGETLB. */
1798 	if ((flags & MFD_HUGETLB) != 0)
1799 		return (ENOSYS);
1800 	oflags = O_RDWR;
1801 	shmflags = SHM_GROW_ON_WRITE;
1802 	if ((flags & MFD_CLOEXEC) != 0)
1803 		oflags |= O_CLOEXEC;
1804 	if ((flags & MFD_ALLOW_SEALING) != 0)
1805 		shmflags |= SHM_ALLOW_SEALING;
1806 	return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
1807 	    memfd_name));
1808 }
1809 
1810 int
1811 linux_splice(struct thread *td, struct linux_splice_args *args)
1812 {
1813 
1814 	linux_msg(td, "syscall splice not really implemented");
1815 
1816 	/*
1817 	 * splice(2) is documented to return EINVAL in various circumstances;
1818 	 * returning it instead of ENOSYS should hint the caller to use fallback
1819 	 * instead.
1820 	 */
1821 	return (EINVAL);
1822 }
1823