xref: /freebsd/sys/compat/linux/linux_file.c (revision 1669d8afc64812c8d2d1d147ae1fd42ff441e1b1)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 #include "opt_mac.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mutex.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/stat.h>
49 #include <sys/sx.h>
50 #include <sys/syscallsubr.h>
51 #include <sys/sysproto.h>
52 #include <sys/tty.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 
56 #include <security/mac/mac_framework.h>
57 
58 #include <ufs/ufs/extattr.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 
62 #ifdef COMPAT_LINUX32
63 #include <machine/../linux32/linux.h>
64 #include <machine/../linux32/linux32_proto.h>
65 #else
66 #include <machine/../linux/linux.h>
67 #include <machine/../linux/linux_proto.h>
68 #endif
69 #include <compat/linux/linux_util.h>
70 
71 int
72 linux_creat(struct thread *td, struct linux_creat_args *args)
73 {
74     char *path;
75     int error;
76 
77     LCONVPATHEXIST(td, args->path, &path);
78 
79 #ifdef DEBUG
80 	if (ldebug(creat))
81 		printf(ARGS(creat, "%s, %d"), path, args->mode);
82 #endif
83     error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC,
84 	args->mode);
85     LFREEPATH(path);
86     return (error);
87 }
88 
89 
90 static int
91 linux_common_open(struct thread *td, char *path, int l_flags, int mode, int openat)
92 {
93     struct proc *p = td->td_proc;
94     struct file *fp;
95     int fd;
96     int bsd_flags, error;
97 
98     bsd_flags = 0;
99     switch (l_flags & LINUX_O_ACCMODE) {
100     case LINUX_O_WRONLY:
101 	bsd_flags |= O_WRONLY;
102 	break;
103     case LINUX_O_RDWR:
104 	bsd_flags |= O_RDWR;
105 	break;
106     default:
107 	bsd_flags |= O_RDONLY;
108     }
109     if (l_flags & LINUX_O_NDELAY)
110 	bsd_flags |= O_NONBLOCK;
111     if (l_flags & LINUX_O_APPEND)
112 	bsd_flags |= O_APPEND;
113     if (l_flags & LINUX_O_SYNC)
114 	bsd_flags |= O_FSYNC;
115     if (l_flags & LINUX_O_NONBLOCK)
116 	bsd_flags |= O_NONBLOCK;
117     if (l_flags & LINUX_FASYNC)
118 	bsd_flags |= O_ASYNC;
119     if (l_flags & LINUX_O_CREAT)
120 	bsd_flags |= O_CREAT;
121     if (l_flags & LINUX_O_TRUNC)
122 	bsd_flags |= O_TRUNC;
123     if (l_flags & LINUX_O_EXCL)
124 	bsd_flags |= O_EXCL;
125     if (l_flags & LINUX_O_NOCTTY)
126 	bsd_flags |= O_NOCTTY;
127     if (l_flags & LINUX_O_DIRECT)
128 	bsd_flags |= O_DIRECT;
129     if (l_flags & LINUX_O_NOFOLLOW)
130 	bsd_flags |= O_NOFOLLOW;
131     /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
132 
133     error = kern_open(td, path, UIO_SYSSPACE, bsd_flags, mode);
134     if (!error) {
135 	    fd = td->td_retval[0];
136 	    /*
137 	     * XXX In between kern_open() and fget(), another process
138 	     * having the same filedesc could use that fd without
139 	     * checking below.
140 	     */
141 	    error = fget(td, fd, &fp);
142 	    if (!error) {
143 		    sx_slock(&proctree_lock);
144 		    PROC_LOCK(p);
145 		    if (!(bsd_flags & O_NOCTTY) &&
146 			SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
147 			    PROC_UNLOCK(p);
148 			    sx_unlock(&proctree_lock);
149 			    if (fp->f_type == DTYPE_VNODE)
150 				    (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
151 					     td->td_ucred, td);
152 		    } else {
153 			    PROC_UNLOCK(p);
154 			    sx_sunlock(&proctree_lock);
155 		    }
156 		    if (l_flags & LINUX_O_DIRECTORY) {
157 			    if (fp->f_type != DTYPE_VNODE ||
158 				fp->f_vnode->v_type != VDIR) {
159 				    error = ENOTDIR;
160 			    }
161 		    }
162 		    fdrop(fp, td);
163 		    /*
164 		     * XXX as above, fdrop()/kern_close() pair is racy.
165 		     */
166 		    if (error)
167 			    kern_close(td, fd);
168 	    }
169     }
170 
171 #ifdef DEBUG
172     if (ldebug(open))
173 	    printf(LMSG("open returns error %d"), error);
174 #endif
175     if (!openat)
176 	LFREEPATH(path);
177     return error;
178 }
179 
180 /*
181  * common code for linux *at set of syscalls
182  *
183  * works like this:
184  * if filename is absolute
185  *    ignore dirfd
186  * else
187  *    if dirfd == AT_FDCWD
188  *       return CWD/filename
189  *    else
190  *       return DIRFD/filename
191  */
192 static int
193 linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
194 {
195    	struct file *fp;
196 	int error = 0, vfslocked;
197 	struct vnode *dvp;
198 	struct filedesc *fdp = td->td_proc->p_fd;
199 	char *fullpath = "unknown";
200 	char *freepath = NULL;
201 
202 	/* don't do anything if the pathname is absolute */
203 	if (*filename == '/') {
204 	   	*newpath= filename;
205 	   	return (0);
206 	}
207 
208 	/* check for AT_FDWCD */
209 	if (dirfd == LINUX_AT_FDCWD) {
210 	   	FILEDESC_SLOCK(fdp);
211 		dvp = fdp->fd_cdir;
212 		vref(dvp);
213 	   	FILEDESC_SUNLOCK(fdp);
214 	} else {
215 	   	error = fget(td, dirfd, &fp);
216 		if (error)
217 		   	return (error);
218 		dvp = fp->f_vnode;
219 		/* only a dir can be dfd */
220 		if (dvp->v_type != VDIR) {
221 		   	fdrop(fp, td);
222 			return (ENOTDIR);
223 		}
224 		vref(dvp);
225 		fdrop(fp, td);
226 	}
227 
228 	/*
229 	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
230 	 * file path, and may fail in several common situations, including
231 	 * for file systmes that don't use the name cache, and if the entry
232 	 * for the file falls out of the name cache.  We should implement
233 	 * openat() in the FreeBSD native system call layer properly (using a
234 	 * requested starting directory), and have Linux and other ABIs wrap
235 	 * the native implementation.
236 	 */
237 	error = vn_fullpath(td, dvp, &fullpath, &freepath);
238 	if (!error) {
239 	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
240 		*freebuf = freepath;
241 		sprintf(*newpath, "%s/%s", fullpath, filename);
242 	} else {
243 		*newpath = NULL;
244 	}
245 	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
246 	vrele(dvp);
247 	VFS_UNLOCK_GIANT(vfslocked);
248 	return (error);
249 }
250 
251 int
252 linux_openat(struct thread *td, struct linux_openat_args *args)
253 {
254 	char *newpath, *oldpath, *freebuf, *path;
255 	int error;
256 
257 	oldpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
258 	error = copyinstr(args->filename, oldpath, MAXPATHLEN, NULL);
259 	if (error) {
260 		free(oldpath, M_TEMP);
261 		return (error);
262 	}
263 #ifdef DEBUG
264 	if (ldebug(openat))
265 		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
266 		    oldpath, args->flags, args->mode);
267 #endif
268 	newpath = freebuf = NULL;
269 	error = linux_at(td, args->dfd, oldpath, &newpath, &freebuf);
270 	if (error == 0) {
271 #ifdef DEBUG
272 		if (ldebug(openat))
273 			printf(LMSG("newpath: %s"), newpath);
274 #endif
275 		if (args->flags & LINUX_O_CREAT)
276 			LCONVPATH_SEG(td, newpath, &path, 1, UIO_SYSSPACE);
277 		else
278 			LCONVPATH_SEG(td, newpath, &path, 0, UIO_SYSSPACE);
279 	}
280 	if (freebuf)
281 	   	free(freebuf, M_TEMP);
282 	if (*oldpath != '/')
283    	   	free(newpath, M_TEMP);
284 	if (error == 0) {
285 		error = linux_common_open(td, path, args->flags,
286 		    args->mode, 1);
287 		LFREEPATH(path);
288 	}
289 	free(oldpath, M_TEMP);
290 	return (error);
291 }
292 
293 int
294 linux_open(struct thread *td, struct linux_open_args *args)
295 {
296     char *path;
297 
298     if (args->flags & LINUX_O_CREAT)
299 	LCONVPATHCREAT(td, args->path, &path);
300     else
301 	LCONVPATHEXIST(td, args->path, &path);
302 
303 #ifdef DEBUG
304 	if (ldebug(open))
305 		printf(ARGS(open, "%s, 0x%x, 0x%x"),
306 		    path, args->flags, args->mode);
307 #endif
308 
309     return linux_common_open(td, path, args->flags, args->mode, 0);
310 }
311 
312 int
313 linux_lseek(struct thread *td, struct linux_lseek_args *args)
314 {
315 
316     struct lseek_args /* {
317 	int fd;
318 	int pad;
319 	off_t offset;
320 	int whence;
321     } */ tmp_args;
322     int error;
323 
324 #ifdef DEBUG
325 	if (ldebug(lseek))
326 		printf(ARGS(lseek, "%d, %ld, %d"),
327 		    args->fdes, (long)args->off, args->whence);
328 #endif
329     tmp_args.fd = args->fdes;
330     tmp_args.offset = (off_t)args->off;
331     tmp_args.whence = args->whence;
332     error = lseek(td, &tmp_args);
333     return error;
334 }
335 
336 int
337 linux_llseek(struct thread *td, struct linux_llseek_args *args)
338 {
339 	struct lseek_args bsd_args;
340 	int error;
341 	off_t off;
342 
343 #ifdef DEBUG
344 	if (ldebug(llseek))
345 		printf(ARGS(llseek, "%d, %d:%d, %d"),
346 		    args->fd, args->ohigh, args->olow, args->whence);
347 #endif
348 	off = (args->olow) | (((off_t) args->ohigh) << 32);
349 
350 	bsd_args.fd = args->fd;
351 	bsd_args.offset = off;
352 	bsd_args.whence = args->whence;
353 
354 	if ((error = lseek(td, &bsd_args)))
355 		return error;
356 
357 	if ((error = copyout(td->td_retval, args->res, sizeof (off_t))))
358 		return error;
359 
360 	td->td_retval[0] = 0;
361 	return 0;
362 }
363 
364 int
365 linux_readdir(struct thread *td, struct linux_readdir_args *args)
366 {
367 	struct linux_getdents_args lda;
368 
369 	lda.fd = args->fd;
370 	lda.dent = args->dent;
371 	lda.count = 1;
372 	return linux_getdents(td, &lda);
373 }
374 
375 /*
376  * Note that linux_getdents(2) and linux_getdents64(2) have the same
377  * arguments. They only differ in the definition of struct dirent they
378  * operate on. We use this to common the code, with the exception of
379  * accessing struct dirent. Note that linux_readdir(2) is implemented
380  * by means of linux_getdents(2). In this case we never operate on
381  * struct dirent64 and thus don't need to handle it...
382  */
383 
384 struct l_dirent {
385 	l_long		d_ino;
386 	l_off_t		d_off;
387 	l_ushort	d_reclen;
388 	char		d_name[LINUX_NAME_MAX + 1];
389 };
390 
391 struct l_dirent64 {
392 	uint64_t	d_ino;
393 	int64_t		d_off;
394 	l_ushort	d_reclen;
395 	u_char		d_type;
396 	char		d_name[LINUX_NAME_MAX + 1];
397 };
398 
399 #define LINUX_RECLEN(de,namlen) \
400     ALIGN((((char *)&(de)->d_name - (char *)de) + (namlen) + 1))
401 
402 #define	LINUX_DIRBLKSIZ		512
403 
404 static int
405 getdents_common(struct thread *td, struct linux_getdents64_args *args,
406     int is64bit)
407 {
408 	struct dirent *bdp;
409 	struct vnode *vp;
410 	caddr_t inp, buf;		/* BSD-format */
411 	int len, reclen;		/* BSD-format */
412 	caddr_t outp;			/* Linux-format */
413 	int resid, linuxreclen=0;	/* Linux-format */
414 	struct file *fp;
415 	struct uio auio;
416 	struct iovec aiov;
417 	off_t off;
418 	struct l_dirent linux_dirent;
419 	struct l_dirent64 linux_dirent64;
420 	int buflen, error, eofflag, nbytes, justone;
421 	u_long *cookies = NULL, *cookiep;
422 	int ncookies, vfslocked;
423 
424 	nbytes = args->count;
425 	if (nbytes == 1) {
426 		/* readdir(2) case. Always struct dirent. */
427 		if (is64bit)
428 			return (EINVAL);
429 		nbytes = sizeof(linux_dirent);
430 		justone = 1;
431 	} else
432 		justone = 0;
433 
434 	if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0)
435 		return (error);
436 
437 	if ((fp->f_flag & FREAD) == 0) {
438 		fdrop(fp, td);
439 		return (EBADF);
440 	}
441 
442 	vp = fp->f_vnode;
443 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
444 	if (vp->v_type != VDIR) {
445 		VFS_UNLOCK_GIANT(vfslocked);
446 		fdrop(fp, td);
447 		return (EINVAL);
448 	}
449 
450 	off = fp->f_offset;
451 
452 	buflen = max(LINUX_DIRBLKSIZ, nbytes);
453 	buflen = min(buflen, MAXBSIZE);
454 	buf = malloc(buflen, M_TEMP, M_WAITOK);
455 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
456 
457 again:
458 	aiov.iov_base = buf;
459 	aiov.iov_len = buflen;
460 	auio.uio_iov = &aiov;
461 	auio.uio_iovcnt = 1;
462 	auio.uio_rw = UIO_READ;
463 	auio.uio_segflg = UIO_SYSSPACE;
464 	auio.uio_td = td;
465 	auio.uio_resid = buflen;
466 	auio.uio_offset = off;
467 
468 	if (cookies) {
469 		free(cookies, M_TEMP);
470 		cookies = NULL;
471 	}
472 
473 #ifdef MAC
474 	/*
475 	 * Do directory search MAC check using non-cached credentials.
476 	 */
477 	if ((error = mac_vnode_check_readdir(td->td_ucred, vp)))
478 		goto out;
479 #endif /* MAC */
480 	if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies,
481 		 &cookies)))
482 		goto out;
483 
484 	inp = buf;
485 	outp = (caddr_t)args->dirent;
486 	resid = nbytes;
487 	if ((len = buflen - auio.uio_resid) <= 0)
488 		goto eof;
489 
490 	cookiep = cookies;
491 
492 	if (cookies) {
493 		/*
494 		 * When using cookies, the vfs has the option of reading from
495 		 * a different offset than that supplied (UFS truncates the
496 		 * offset to a block boundary to make sure that it never reads
497 		 * partway through a directory entry, even if the directory
498 		 * has been compacted).
499 		 */
500 		while (len > 0 && ncookies > 0 && *cookiep <= off) {
501 			bdp = (struct dirent *) inp;
502 			len -= bdp->d_reclen;
503 			inp += bdp->d_reclen;
504 			cookiep++;
505 			ncookies--;
506 		}
507 	}
508 
509 	while (len > 0) {
510 		if (cookiep && ncookies == 0)
511 			break;
512 		bdp = (struct dirent *) inp;
513 		reclen = bdp->d_reclen;
514 		if (reclen & 3) {
515 			error = EFAULT;
516 			goto out;
517 		}
518 
519 		if (bdp->d_fileno == 0) {
520 			inp += reclen;
521 			if (cookiep) {
522 				off = *cookiep++;
523 				ncookies--;
524 			} else
525 				off += reclen;
526 
527 			len -= reclen;
528 			continue;
529 		}
530 
531 		linuxreclen = (is64bit)
532 		    ? LINUX_RECLEN(&linux_dirent64, bdp->d_namlen)
533 		    : LINUX_RECLEN(&linux_dirent, bdp->d_namlen);
534 
535 		if (reclen > len || resid < linuxreclen) {
536 			outp++;
537 			break;
538 		}
539 
540 		if (justone) {
541 			/* readdir(2) case. */
542 			linux_dirent.d_ino = (l_long)bdp->d_fileno;
543 			linux_dirent.d_off = (l_off_t)linuxreclen;
544 			linux_dirent.d_reclen = (l_ushort)bdp->d_namlen;
545 			strcpy(linux_dirent.d_name, bdp->d_name);
546 			error = copyout(&linux_dirent, outp, linuxreclen);
547 		} else {
548 			if (is64bit) {
549 				linux_dirent64.d_ino = bdp->d_fileno;
550 				linux_dirent64.d_off = (cookiep)
551 				    ? (l_off_t)*cookiep
552 				    : (l_off_t)(off + reclen);
553 				linux_dirent64.d_reclen =
554 				    (l_ushort)linuxreclen;
555 				linux_dirent64.d_type = bdp->d_type;
556 				strcpy(linux_dirent64.d_name, bdp->d_name);
557 				error = copyout(&linux_dirent64, outp,
558 				    linuxreclen);
559 			} else {
560 				linux_dirent.d_ino = bdp->d_fileno;
561 				linux_dirent.d_off = (cookiep)
562 				    ? (l_off_t)*cookiep
563 				    : (l_off_t)(off + reclen);
564 				linux_dirent.d_reclen = (l_ushort)linuxreclen;
565 				strcpy(linux_dirent.d_name, bdp->d_name);
566 				error = copyout(&linux_dirent, outp,
567 				    linuxreclen);
568 			}
569 		}
570 		if (error)
571 			goto out;
572 
573 		inp += reclen;
574 		if (cookiep) {
575 			off = *cookiep++;
576 			ncookies--;
577 		} else
578 			off += reclen;
579 
580 		outp += linuxreclen;
581 		resid -= linuxreclen;
582 		len -= reclen;
583 		if (justone)
584 			break;
585 	}
586 
587 	if (outp == (caddr_t)args->dirent)
588 		goto again;
589 
590 	fp->f_offset = off;
591 	if (justone)
592 		nbytes = resid + linuxreclen;
593 
594 eof:
595 	td->td_retval[0] = nbytes - resid;
596 
597 out:
598 	if (cookies)
599 		free(cookies, M_TEMP);
600 
601 	VOP_UNLOCK(vp, 0);
602 	VFS_UNLOCK_GIANT(vfslocked);
603 	fdrop(fp, td);
604 	free(buf, M_TEMP);
605 	return (error);
606 }
607 
608 int
609 linux_getdents(struct thread *td, struct linux_getdents_args *args)
610 {
611 
612 #ifdef DEBUG
613 	if (ldebug(getdents))
614 		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
615 #endif
616 
617 	return (getdents_common(td, (struct linux_getdents64_args*)args, 0));
618 }
619 
620 int
621 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
622 {
623 
624 #ifdef DEBUG
625 	if (ldebug(getdents64))
626 		printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
627 #endif
628 
629 	return (getdents_common(td, args, 1));
630 }
631 
632 /*
633  * These exist mainly for hooks for doing /compat/linux translation.
634  */
635 
636 int
637 linux_access(struct thread *td, struct linux_access_args *args)
638 {
639 	char *path;
640 	int error;
641 
642 	/* linux convention */
643 	if (args->flags & ~(F_OK | X_OK | W_OK | R_OK))
644 		return (EINVAL);
645 
646 	LCONVPATHEXIST(td, args->path, &path);
647 
648 #ifdef DEBUG
649 	if (ldebug(access))
650 		printf(ARGS(access, "%s, %d"), path, args->flags);
651 #endif
652 	error = kern_access(td, path, UIO_SYSSPACE, args->flags);
653 	LFREEPATH(path);
654 
655 	return (error);
656 }
657 
658 int
659 linux_unlink(struct thread *td, struct linux_unlink_args *args)
660 {
661 	char *path;
662 	int error;
663 	struct stat st;
664 
665 	LCONVPATHEXIST(td, args->path, &path);
666 
667 #ifdef DEBUG
668 	if (ldebug(unlink))
669 		printf(ARGS(unlink, "%s"), path);
670 #endif
671 
672 	error = kern_unlink(td, path, UIO_SYSSPACE);
673 	if (error == EPERM)
674 		/* Introduce POSIX noncompliant behaviour of Linux */
675 		if (kern_stat(td, path, UIO_SYSSPACE, &st) == 0)
676 			if (S_ISDIR(st.st_mode))
677 				error = EISDIR;
678 	LFREEPATH(path);
679 	return (error);
680 }
681 
682 int
683 linux_chdir(struct thread *td, struct linux_chdir_args *args)
684 {
685 	char *path;
686 	int error;
687 
688 	LCONVPATHEXIST(td, args->path, &path);
689 
690 #ifdef DEBUG
691 	if (ldebug(chdir))
692 		printf(ARGS(chdir, "%s"), path);
693 #endif
694 	error = kern_chdir(td, path, UIO_SYSSPACE);
695 	LFREEPATH(path);
696 	return (error);
697 }
698 
699 int
700 linux_chmod(struct thread *td, struct linux_chmod_args *args)
701 {
702 	char *path;
703 	int error;
704 
705 	LCONVPATHEXIST(td, args->path, &path);
706 
707 #ifdef DEBUG
708 	if (ldebug(chmod))
709 		printf(ARGS(chmod, "%s, %d"), path, args->mode);
710 #endif
711 	error = kern_chmod(td, path, UIO_SYSSPACE, args->mode);
712 	LFREEPATH(path);
713 	return (error);
714 }
715 
716 int
717 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
718 {
719 	char *path;
720 	int error;
721 
722 	LCONVPATHCREAT(td, args->path, &path);
723 
724 #ifdef DEBUG
725 	if (ldebug(mkdir))
726 		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
727 #endif
728 	error = kern_mkdir(td, path, UIO_SYSSPACE, args->mode);
729 	LFREEPATH(path);
730 	return (error);
731 }
732 
733 int
734 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
735 {
736 	char *path;
737 	int error;
738 
739 	LCONVPATHEXIST(td, args->path, &path);
740 
741 #ifdef DEBUG
742 	if (ldebug(rmdir))
743 		printf(ARGS(rmdir, "%s"), path);
744 #endif
745 	error = kern_rmdir(td, path, UIO_SYSSPACE);
746 	LFREEPATH(path);
747 	return (error);
748 }
749 
750 int
751 linux_rename(struct thread *td, struct linux_rename_args *args)
752 {
753 	char *from, *to;
754 	int error;
755 
756 	LCONVPATHEXIST(td, args->from, &from);
757 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
758 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
759 	if (to == NULL) {
760 		LFREEPATH(from);
761 		return (error);
762 	}
763 
764 #ifdef DEBUG
765 	if (ldebug(rename))
766 		printf(ARGS(rename, "%s, %s"), from, to);
767 #endif
768 	error = kern_rename(td, from, to, UIO_SYSSPACE);
769 	LFREEPATH(from);
770 	LFREEPATH(to);
771 	return (error);
772 }
773 
774 int
775 linux_symlink(struct thread *td, struct linux_symlink_args *args)
776 {
777 	char *path, *to;
778 	int error;
779 
780 	LCONVPATHEXIST(td, args->path, &path);
781 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
782 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
783 	if (to == NULL) {
784 		LFREEPATH(path);
785 		return (error);
786 	}
787 
788 #ifdef DEBUG
789 	if (ldebug(symlink))
790 		printf(ARGS(symlink, "%s, %s"), path, to);
791 #endif
792 	error = kern_symlink(td, path, to, UIO_SYSSPACE);
793 	LFREEPATH(path);
794 	LFREEPATH(to);
795 	return (error);
796 }
797 
798 int
799 linux_readlink(struct thread *td, struct linux_readlink_args *args)
800 {
801 	char *name;
802 	int error;
803 
804 	LCONVPATHEXIST(td, args->name, &name);
805 
806 #ifdef DEBUG
807 	if (ldebug(readlink))
808 		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
809 		    args->count);
810 #endif
811 	error = kern_readlink(td, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE,
812 	    args->count);
813 	LFREEPATH(name);
814 	return (error);
815 }
816 
817 int
818 linux_truncate(struct thread *td, struct linux_truncate_args *args)
819 {
820 	char *path;
821 	int error;
822 
823 	LCONVPATHEXIST(td, args->path, &path);
824 
825 #ifdef DEBUG
826 	if (ldebug(truncate))
827 		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
828 #endif
829 
830 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
831 	LFREEPATH(path);
832 	return (error);
833 }
834 
835 int
836 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
837 {
838 	struct ftruncate_args /* {
839 		int fd;
840 		int pad;
841 		off_t length;
842 		} */ nuap;
843 
844 	nuap.fd = args->fd;
845 	nuap.length = args->length;
846 	return (ftruncate(td, &nuap));
847 }
848 
849 int
850 linux_link(struct thread *td, struct linux_link_args *args)
851 {
852 	char *path, *to;
853 	int error;
854 
855 	LCONVPATHEXIST(td, args->path, &path);
856 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
857 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
858 	if (to == NULL) {
859 		LFREEPATH(path);
860 		return (error);
861 	}
862 
863 #ifdef DEBUG
864 	if (ldebug(link))
865 		printf(ARGS(link, "%s, %s"), path, to);
866 #endif
867 	error = kern_link(td, path, to, UIO_SYSSPACE);
868 	LFREEPATH(path);
869 	LFREEPATH(to);
870 	return (error);
871 }
872 
873 int
874 linux_fdatasync(td, uap)
875 	struct thread *td;
876 	struct linux_fdatasync_args *uap;
877 {
878 	struct fsync_args bsd;
879 
880 	bsd.fd = uap->fd;
881 	return fsync(td, &bsd);
882 }
883 
884 int
885 linux_pread(td, uap)
886 	struct thread *td;
887 	struct linux_pread_args *uap;
888 {
889 	struct pread_args bsd;
890 	struct vnode *vp;
891 	int error;
892 
893 	bsd.fd = uap->fd;
894 	bsd.buf = uap->buf;
895 	bsd.nbyte = uap->nbyte;
896 	bsd.offset = uap->offset;
897 
898 	error = pread(td, &bsd);
899 
900 	if (error == 0) {
901    	   	/* This seems to violate POSIX but linux does it */
902    	   	if ((error = fgetvp(td, uap->fd, &vp)) != 0)
903    		   	return (error);
904 		if (vp->v_type == VDIR) {
905    		   	vrele(vp);
906 			return (EISDIR);
907 		}
908 		vrele(vp);
909 	}
910 
911 	return (error);
912 }
913 
914 int
915 linux_pwrite(td, uap)
916 	struct thread *td;
917 	struct linux_pwrite_args *uap;
918 {
919 	struct pwrite_args bsd;
920 
921 	bsd.fd = uap->fd;
922 	bsd.buf = uap->buf;
923 	bsd.nbyte = uap->nbyte;
924 	bsd.offset = uap->offset;
925 	return pwrite(td, &bsd);
926 }
927 
928 int
929 linux_mount(struct thread *td, struct linux_mount_args *args)
930 {
931 	struct ufs_args ufs;
932 	char fstypename[MFSNAMELEN];
933 	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
934 	int error;
935 	int fsflags;
936 	void *fsdata;
937 
938 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
939 	    NULL);
940 	if (error)
941 		return (error);
942 	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
943 	if (error)
944 		return (error);
945 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
946 	if (error)
947 		return (error);
948 
949 #ifdef DEBUG
950 	if (ldebug(mount))
951 		printf(ARGS(mount, "%s, %s, %s"),
952 		    fstypename, mntfromname, mntonname);
953 #endif
954 
955 	if (strcmp(fstypename, "ext2") == 0) {
956 		strcpy(fstypename, "ext2fs");
957 		fsdata = &ufs;
958 		ufs.fspec = mntfromname;
959 #define DEFAULT_ROOTID		-2
960 		ufs.export.ex_root = DEFAULT_ROOTID;
961 		ufs.export.ex_flags =
962 		    args->rwflag & LINUX_MS_RDONLY ? MNT_EXRDONLY : 0;
963 	} else if (strcmp(fstypename, "proc") == 0) {
964 		strcpy(fstypename, "linprocfs");
965 		fsdata = NULL;
966 	} else {
967 		return (ENODEV);
968 	}
969 
970 	fsflags = 0;
971 
972 	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
973 		/*
974 		 * Linux SYNC flag is not included; the closest equivalent
975 		 * FreeBSD has is !ASYNC, which is our default.
976 		 */
977 		if (args->rwflag & LINUX_MS_RDONLY)
978 			fsflags |= MNT_RDONLY;
979 		if (args->rwflag & LINUX_MS_NOSUID)
980 			fsflags |= MNT_NOSUID;
981 		if (args->rwflag & LINUX_MS_NOEXEC)
982 			fsflags |= MNT_NOEXEC;
983 		if (args->rwflag & LINUX_MS_REMOUNT)
984 			fsflags |= MNT_UPDATE;
985 	}
986 
987 	if (strcmp(fstypename, "linprocfs") == 0) {
988 		error = kernel_vmount(fsflags,
989 			"fstype", fstypename,
990 			"fspath", mntonname,
991 			NULL);
992 	} else
993 		error = EOPNOTSUPP;
994 	return (error);
995 }
996 
997 int
998 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
999 {
1000 	struct linux_umount_args args2;
1001 
1002 	args2.path = args->path;
1003 	args2.flags = 0;
1004 	return (linux_umount(td, &args2));
1005 }
1006 
1007 int
1008 linux_umount(struct thread *td, struct linux_umount_args *args)
1009 {
1010 	struct unmount_args bsd;
1011 
1012 	bsd.path = args->path;
1013 	bsd.flags = args->flags;	/* XXX correct? */
1014 	return (unmount(td, &bsd));
1015 }
1016 
1017 /*
1018  * fcntl family of syscalls
1019  */
1020 
1021 struct l_flock {
1022 	l_short		l_type;
1023 	l_short		l_whence;
1024 	l_off_t		l_start;
1025 	l_off_t		l_len;
1026 	l_pid_t		l_pid;
1027 }
1028 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1029 __packed
1030 #endif
1031 ;
1032 
1033 static void
1034 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1035 {
1036 	switch (linux_flock->l_type) {
1037 	case LINUX_F_RDLCK:
1038 		bsd_flock->l_type = F_RDLCK;
1039 		break;
1040 	case LINUX_F_WRLCK:
1041 		bsd_flock->l_type = F_WRLCK;
1042 		break;
1043 	case LINUX_F_UNLCK:
1044 		bsd_flock->l_type = F_UNLCK;
1045 		break;
1046 	default:
1047 		bsd_flock->l_type = -1;
1048 		break;
1049 	}
1050 	bsd_flock->l_whence = linux_flock->l_whence;
1051 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1052 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1053 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1054 }
1055 
1056 static void
1057 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1058 {
1059 	switch (bsd_flock->l_type) {
1060 	case F_RDLCK:
1061 		linux_flock->l_type = LINUX_F_RDLCK;
1062 		break;
1063 	case F_WRLCK:
1064 		linux_flock->l_type = LINUX_F_WRLCK;
1065 		break;
1066 	case F_UNLCK:
1067 		linux_flock->l_type = LINUX_F_UNLCK;
1068 		break;
1069 	}
1070 	linux_flock->l_whence = bsd_flock->l_whence;
1071 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1072 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1073 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1074 }
1075 
1076 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1077 struct l_flock64 {
1078 	l_short		l_type;
1079 	l_short		l_whence;
1080 	l_loff_t	l_start;
1081 	l_loff_t	l_len;
1082 	l_pid_t		l_pid;
1083 }
1084 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1085 __packed
1086 #endif
1087 ;
1088 
1089 static void
1090 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1091 {
1092 	switch (linux_flock->l_type) {
1093 	case LINUX_F_RDLCK:
1094 		bsd_flock->l_type = F_RDLCK;
1095 		break;
1096 	case LINUX_F_WRLCK:
1097 		bsd_flock->l_type = F_WRLCK;
1098 		break;
1099 	case LINUX_F_UNLCK:
1100 		bsd_flock->l_type = F_UNLCK;
1101 		break;
1102 	default:
1103 		bsd_flock->l_type = -1;
1104 		break;
1105 	}
1106 	bsd_flock->l_whence = linux_flock->l_whence;
1107 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1108 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1109 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1110 }
1111 
1112 static void
1113 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1114 {
1115 	switch (bsd_flock->l_type) {
1116 	case F_RDLCK:
1117 		linux_flock->l_type = LINUX_F_RDLCK;
1118 		break;
1119 	case F_WRLCK:
1120 		linux_flock->l_type = LINUX_F_WRLCK;
1121 		break;
1122 	case F_UNLCK:
1123 		linux_flock->l_type = LINUX_F_UNLCK;
1124 		break;
1125 	}
1126 	linux_flock->l_whence = bsd_flock->l_whence;
1127 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1128 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1129 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1130 }
1131 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1132 
1133 static int
1134 fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
1135 {
1136 	struct l_flock linux_flock;
1137 	struct flock bsd_flock;
1138 	struct file *fp;
1139 	long arg;
1140 	int error, result;
1141 
1142 	switch (args->cmd) {
1143 	case LINUX_F_DUPFD:
1144 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1145 
1146 	case LINUX_F_GETFD:
1147 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1148 
1149 	case LINUX_F_SETFD:
1150 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1151 
1152 	case LINUX_F_GETFL:
1153 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1154 		result = td->td_retval[0];
1155 		td->td_retval[0] = 0;
1156 		if (result & O_RDONLY)
1157 			td->td_retval[0] |= LINUX_O_RDONLY;
1158 		if (result & O_WRONLY)
1159 			td->td_retval[0] |= LINUX_O_WRONLY;
1160 		if (result & O_RDWR)
1161 			td->td_retval[0] |= LINUX_O_RDWR;
1162 		if (result & O_NDELAY)
1163 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1164 		if (result & O_APPEND)
1165 			td->td_retval[0] |= LINUX_O_APPEND;
1166 		if (result & O_FSYNC)
1167 			td->td_retval[0] |= LINUX_O_SYNC;
1168 		if (result & O_ASYNC)
1169 			td->td_retval[0] |= LINUX_FASYNC;
1170 #ifdef LINUX_O_NOFOLLOW
1171 		if (result & O_NOFOLLOW)
1172 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1173 #endif
1174 #ifdef LINUX_O_DIRECT
1175 		if (result & O_DIRECT)
1176 			td->td_retval[0] |= LINUX_O_DIRECT;
1177 #endif
1178 		return (error);
1179 
1180 	case LINUX_F_SETFL:
1181 		arg = 0;
1182 		if (args->arg & LINUX_O_NDELAY)
1183 			arg |= O_NONBLOCK;
1184 		if (args->arg & LINUX_O_APPEND)
1185 			arg |= O_APPEND;
1186 		if (args->arg & LINUX_O_SYNC)
1187 			arg |= O_FSYNC;
1188 		if (args->arg & LINUX_FASYNC)
1189 			arg |= O_ASYNC;
1190 #ifdef LINUX_O_NOFOLLOW
1191 		if (args->arg & LINUX_O_NOFOLLOW)
1192 			arg |= O_NOFOLLOW;
1193 #endif
1194 #ifdef LINUX_O_DIRECT
1195 		if (args->arg & LINUX_O_DIRECT)
1196 			arg |= O_DIRECT;
1197 #endif
1198 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1199 
1200 	case LINUX_F_GETLK:
1201 		error = copyin((void *)args->arg, &linux_flock,
1202 		    sizeof(linux_flock));
1203 		if (error)
1204 			return (error);
1205 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1206 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1207 		if (error)
1208 			return (error);
1209 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1210 		return (copyout(&linux_flock, (void *)args->arg,
1211 		    sizeof(linux_flock)));
1212 
1213 	case LINUX_F_SETLK:
1214 		error = copyin((void *)args->arg, &linux_flock,
1215 		    sizeof(linux_flock));
1216 		if (error)
1217 			return (error);
1218 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1219 		return (kern_fcntl(td, args->fd, F_SETLK,
1220 		    (intptr_t)&bsd_flock));
1221 
1222 	case LINUX_F_SETLKW:
1223 		error = copyin((void *)args->arg, &linux_flock,
1224 		    sizeof(linux_flock));
1225 		if (error)
1226 			return (error);
1227 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1228 		return (kern_fcntl(td, args->fd, F_SETLKW,
1229 		     (intptr_t)&bsd_flock));
1230 
1231 	case LINUX_F_GETOWN:
1232 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1233 
1234 	case LINUX_F_SETOWN:
1235 		/*
1236 		 * XXX some Linux applications depend on F_SETOWN having no
1237 		 * significant effect for pipes (SIGIO is not delivered for
1238 		 * pipes under Linux-2.2.35 at least).
1239 		 */
1240 		error = fget(td, args->fd, &fp);
1241 		if (error)
1242 			return (error);
1243 		if (fp->f_type == DTYPE_PIPE) {
1244 			fdrop(fp, td);
1245 			return (EINVAL);
1246 		}
1247 		fdrop(fp, td);
1248 
1249 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1250 	}
1251 
1252 	return (EINVAL);
1253 }
1254 
1255 int
1256 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1257 {
1258 	struct linux_fcntl64_args args64;
1259 
1260 #ifdef DEBUG
1261 	if (ldebug(fcntl))
1262 		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
1263 #endif
1264 
1265 	args64.fd = args->fd;
1266 	args64.cmd = args->cmd;
1267 	args64.arg = args->arg;
1268 	return (fcntl_common(td, &args64));
1269 }
1270 
1271 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1272 int
1273 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1274 {
1275 	struct l_flock64 linux_flock;
1276 	struct flock bsd_flock;
1277 	int error;
1278 
1279 #ifdef DEBUG
1280 	if (ldebug(fcntl64))
1281 		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
1282 #endif
1283 
1284 	switch (args->cmd) {
1285 	case LINUX_F_GETLK64:
1286 		error = copyin((void *)args->arg, &linux_flock,
1287 		    sizeof(linux_flock));
1288 		if (error)
1289 			return (error);
1290 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1291 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1292 		if (error)
1293 			return (error);
1294 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1295 		return (copyout(&linux_flock, (void *)args->arg,
1296 			    sizeof(linux_flock)));
1297 
1298 	case LINUX_F_SETLK64:
1299 		error = copyin((void *)args->arg, &linux_flock,
1300 		    sizeof(linux_flock));
1301 		if (error)
1302 			return (error);
1303 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1304 		return (kern_fcntl(td, args->fd, F_SETLK,
1305 		    (intptr_t)&bsd_flock));
1306 
1307 	case LINUX_F_SETLKW64:
1308 		error = copyin((void *)args->arg, &linux_flock,
1309 		    sizeof(linux_flock));
1310 		if (error)
1311 			return (error);
1312 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1313 		return (kern_fcntl(td, args->fd, F_SETLKW,
1314 		    (intptr_t)&bsd_flock));
1315 	}
1316 
1317 	return (fcntl_common(td, args));
1318 }
1319 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1320 
1321 int
1322 linux_chown(struct thread *td, struct linux_chown_args *args)
1323 {
1324 	char *path;
1325 	int error;
1326 
1327 	LCONVPATHEXIST(td, args->path, &path);
1328 
1329 #ifdef DEBUG
1330 	if (ldebug(chown))
1331 		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
1332 #endif
1333 	error = kern_chown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1334 	LFREEPATH(path);
1335 	return (error);
1336 }
1337 
1338 int
1339 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1340 {
1341 	char *path;
1342 	int error;
1343 
1344 	LCONVPATHEXIST(td, args->path, &path);
1345 
1346 #ifdef DEBUG
1347 	if (ldebug(lchown))
1348 		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
1349 #endif
1350 	error = kern_lchown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1351 	LFREEPATH(path);
1352 	return (error);
1353 }
1354