xref: /freebsd/sys/compat/linux/linux_file.c (revision bfe691b2f75de2224c7ceb304ebcdef2b42d4179)
1 /*-
2  * Copyright (c) 1994-1995 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 #include "opt_mac.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mutex.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/stat.h>
49 #include <sys/sx.h>
50 #include <sys/syscallsubr.h>
51 #include <sys/sysproto.h>
52 #include <sys/tty.h>
53 #include <sys/unistd.h>
54 #include <sys/vnode.h>
55 
56 #include <security/mac/mac_framework.h>
57 
58 #include <ufs/ufs/extattr.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 
62 #ifdef COMPAT_LINUX32
63 #include <machine/../linux32/linux.h>
64 #include <machine/../linux32/linux32_proto.h>
65 #else
66 #include <machine/../linux/linux.h>
67 #include <machine/../linux/linux_proto.h>
68 #endif
69 #include <compat/linux/linux_util.h>
70 
71 int
72 linux_creat(struct thread *td, struct linux_creat_args *args)
73 {
74     char *path;
75     int error;
76 
77     LCONVPATHEXIST(td, args->path, &path);
78 
79 #ifdef DEBUG
80 	if (ldebug(creat))
81 		printf(ARGS(creat, "%s, %d"), path, args->mode);
82 #endif
83     error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC,
84 	args->mode);
85     LFREEPATH(path);
86     return (error);
87 }
88 
89 
90 static int
91 linux_common_open(struct thread *td, char *path, int l_flags, int mode, int openat)
92 {
93     struct proc *p = td->td_proc;
94     struct file *fp;
95     int fd;
96     int bsd_flags, error;
97 
98     bsd_flags = 0;
99     switch (l_flags & LINUX_O_ACCMODE) {
100     case LINUX_O_WRONLY:
101 	bsd_flags |= O_WRONLY;
102 	break;
103     case LINUX_O_RDWR:
104 	bsd_flags |= O_RDWR;
105 	break;
106     default:
107 	bsd_flags |= O_RDONLY;
108     }
109     if (l_flags & LINUX_O_NDELAY)
110 	bsd_flags |= O_NONBLOCK;
111     if (l_flags & LINUX_O_APPEND)
112 	bsd_flags |= O_APPEND;
113     if (l_flags & LINUX_O_SYNC)
114 	bsd_flags |= O_FSYNC;
115     if (l_flags & LINUX_O_NONBLOCK)
116 	bsd_flags |= O_NONBLOCK;
117     if (l_flags & LINUX_FASYNC)
118 	bsd_flags |= O_ASYNC;
119     if (l_flags & LINUX_O_CREAT)
120 	bsd_flags |= O_CREAT;
121     if (l_flags & LINUX_O_TRUNC)
122 	bsd_flags |= O_TRUNC;
123     if (l_flags & LINUX_O_EXCL)
124 	bsd_flags |= O_EXCL;
125     if (l_flags & LINUX_O_NOCTTY)
126 	bsd_flags |= O_NOCTTY;
127     if (l_flags & LINUX_O_DIRECT)
128 	bsd_flags |= O_DIRECT;
129     if (l_flags & LINUX_O_NOFOLLOW)
130 	bsd_flags |= O_NOFOLLOW;
131     /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
132 
133     error = kern_open(td, path, UIO_SYSSPACE, bsd_flags, mode);
134     if (!error) {
135 	    fd = td->td_retval[0];
136 	    /*
137 	     * XXX In between kern_open() and fget(), another process
138 	     * having the same filedesc could use that fd without
139 	     * checking below.
140 	     */
141 	    error = fget(td, fd, &fp);
142 	    if (!error) {
143 		    sx_slock(&proctree_lock);
144 		    PROC_LOCK(p);
145 		    if (!(bsd_flags & O_NOCTTY) &&
146 			SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
147 			    PROC_UNLOCK(p);
148 			    sx_unlock(&proctree_lock);
149 			    if (fp->f_type == DTYPE_VNODE)
150 				    (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
151 					     td->td_ucred, td);
152 		    } else {
153 			    PROC_UNLOCK(p);
154 			    sx_sunlock(&proctree_lock);
155 		    }
156 		    if (l_flags & LINUX_O_DIRECTORY) {
157 			    if (fp->f_type != DTYPE_VNODE ||
158 				fp->f_vnode->v_type != VDIR) {
159 				    error = ENOTDIR;
160 			    }
161 		    }
162 		    fdrop(fp, td);
163 		    /*
164 		     * XXX as above, fdrop()/kern_close() pair is racy.
165 		     */
166 		    if (error)
167 			    kern_close(td, fd);
168 	    }
169     }
170 
171 #ifdef DEBUG
172     if (ldebug(open))
173 	    printf(LMSG("open returns error %d"), error);
174 #endif
175     if (!openat)
176 	LFREEPATH(path);
177     return error;
178 }
179 
180 /*
181  * common code for linux *at set of syscalls
182  *
183  * works like this:
184  * if filename is absolute
185  *    ignore dirfd
186  * else
187  *    if dirfd == AT_FDCWD
188  *       return CWD/filename
189  *    else
190  *       return DIRFD/filename
191  */
192 static int
193 linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
194 {
195    	struct file *fp;
196 	int error = 0, vfslocked;
197 	struct vnode *dvp;
198 	struct filedesc *fdp = td->td_proc->p_fd;
199 	char *fullpath = "unknown";
200 	char *freepath = NULL;
201 
202 	/* don't do anything if the pathname is absolute */
203 	if (*filename == '/') {
204 	   	*newpath= filename;
205 	   	return (0);
206 	}
207 
208 	/* check for AT_FDWCD */
209 	if (dirfd == LINUX_AT_FDCWD) {
210 	   	FILEDESC_SLOCK(fdp);
211 		dvp = fdp->fd_cdir;
212 		vref(dvp);
213 	   	FILEDESC_SUNLOCK(fdp);
214 	} else {
215 	   	error = fget(td, dirfd, &fp);
216 		if (error)
217 		   	return (error);
218 		dvp = fp->f_vnode;
219 		/* only a dir can be dfd */
220 		if (dvp->v_type != VDIR) {
221 		   	fdrop(fp, td);
222 			return (ENOTDIR);
223 		}
224 		vref(dvp);
225 		fdrop(fp, td);
226 	}
227 
228 	/*
229 	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
230 	 * file path, and may fail in several common situations, including
231 	 * for file systmes that don't use the name cache, and if the entry
232 	 * for the file falls out of the name cache.  We should implement
233 	 * openat() in the FreeBSD native system call layer properly (using a
234 	 * requested starting directory), and have Linux and other ABIs wrap
235 	 * the native implementation.
236 	 */
237 	error = vn_fullpath(td, dvp, &fullpath, &freepath);
238 	if (!error) {
239 	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
240 		*freebuf = freepath;
241 		sprintf(*newpath, "%s/%s", fullpath, filename);
242 	}
243 	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
244 	vrele(dvp);
245 	VFS_UNLOCK_GIANT(vfslocked);
246 	return (error);
247 }
248 
249 int
250 linux_openat(struct thread *td, struct linux_openat_args *args)
251 {
252    	char *newpath, *oldpath, *freebuf = NULL, *path;
253 	int error;
254 
255 	oldpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
256 	error = copyinstr(args->filename, oldpath, MAXPATHLEN, NULL);
257 
258 #ifdef DEBUG
259 	if (ldebug(openat))
260 		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
261 		    oldpath, args->flags, args->mode);
262 #endif
263 
264 	error = linux_at(td, args->dfd, oldpath, &newpath, &freebuf);
265 	if (error)
266 	   	return (error);
267 #ifdef DEBUG
268 	printf(LMSG("newpath: %s"), newpath);
269 #endif
270     	if (args->flags & LINUX_O_CREAT)
271 		LCONVPATH_SEG(td, newpath, &path, 1, UIO_SYSSPACE);
272     	else
273 		LCONVPATH_SEG(td, newpath, &path, 0, UIO_SYSSPACE);
274 	if (freebuf)
275 	   	free(freebuf, M_TEMP);
276 	if (*oldpath != '/')
277    	   	free(newpath, M_TEMP);
278 
279 	error = linux_common_open(td, path, args->flags, args->mode, 1);
280 	free(oldpath, M_TEMP);
281 	return (error);
282 }
283 
284 int
285 linux_open(struct thread *td, struct linux_open_args *args)
286 {
287     char *path;
288 
289     if (args->flags & LINUX_O_CREAT)
290 	LCONVPATHCREAT(td, args->path, &path);
291     else
292 	LCONVPATHEXIST(td, args->path, &path);
293 
294 #ifdef DEBUG
295 	if (ldebug(open))
296 		printf(ARGS(open, "%s, 0x%x, 0x%x"),
297 		    path, args->flags, args->mode);
298 #endif
299 
300     return linux_common_open(td, path, args->flags, args->mode, 0);
301 }
302 
303 int
304 linux_lseek(struct thread *td, struct linux_lseek_args *args)
305 {
306 
307     struct lseek_args /* {
308 	int fd;
309 	int pad;
310 	off_t offset;
311 	int whence;
312     } */ tmp_args;
313     int error;
314 
315 #ifdef DEBUG
316 	if (ldebug(lseek))
317 		printf(ARGS(lseek, "%d, %ld, %d"),
318 		    args->fdes, (long)args->off, args->whence);
319 #endif
320     tmp_args.fd = args->fdes;
321     tmp_args.offset = (off_t)args->off;
322     tmp_args.whence = args->whence;
323     error = lseek(td, &tmp_args);
324     return error;
325 }
326 
327 int
328 linux_llseek(struct thread *td, struct linux_llseek_args *args)
329 {
330 	struct lseek_args bsd_args;
331 	int error;
332 	off_t off;
333 
334 #ifdef DEBUG
335 	if (ldebug(llseek))
336 		printf(ARGS(llseek, "%d, %d:%d, %d"),
337 		    args->fd, args->ohigh, args->olow, args->whence);
338 #endif
339 	off = (args->olow) | (((off_t) args->ohigh) << 32);
340 
341 	bsd_args.fd = args->fd;
342 	bsd_args.offset = off;
343 	bsd_args.whence = args->whence;
344 
345 	if ((error = lseek(td, &bsd_args)))
346 		return error;
347 
348 	if ((error = copyout(td->td_retval, args->res, sizeof (off_t))))
349 		return error;
350 
351 	td->td_retval[0] = 0;
352 	return 0;
353 }
354 
355 int
356 linux_readdir(struct thread *td, struct linux_readdir_args *args)
357 {
358 	struct linux_getdents_args lda;
359 
360 	lda.fd = args->fd;
361 	lda.dent = args->dent;
362 	lda.count = 1;
363 	return linux_getdents(td, &lda);
364 }
365 
366 /*
367  * Note that linux_getdents(2) and linux_getdents64(2) have the same
368  * arguments. They only differ in the definition of struct dirent they
369  * operate on. We use this to common the code, with the exception of
370  * accessing struct dirent. Note that linux_readdir(2) is implemented
371  * by means of linux_getdents(2). In this case we never operate on
372  * struct dirent64 and thus don't need to handle it...
373  */
374 
375 struct l_dirent {
376 	l_long		d_ino;
377 	l_off_t		d_off;
378 	l_ushort	d_reclen;
379 	char		d_name[LINUX_NAME_MAX + 1];
380 };
381 
382 struct l_dirent64 {
383 	uint64_t	d_ino;
384 	int64_t		d_off;
385 	l_ushort	d_reclen;
386 	u_char		d_type;
387 	char		d_name[LINUX_NAME_MAX + 1];
388 };
389 
390 #define LINUX_RECLEN(de,namlen) \
391     ALIGN((((char *)&(de)->d_name - (char *)de) + (namlen) + 1))
392 
393 #define	LINUX_DIRBLKSIZ		512
394 
395 static int
396 getdents_common(struct thread *td, struct linux_getdents64_args *args,
397     int is64bit)
398 {
399 	struct dirent *bdp;
400 	struct vnode *vp;
401 	caddr_t inp, buf;		/* BSD-format */
402 	int len, reclen;		/* BSD-format */
403 	caddr_t outp;			/* Linux-format */
404 	int resid, linuxreclen=0;	/* Linux-format */
405 	struct file *fp;
406 	struct uio auio;
407 	struct iovec aiov;
408 	off_t off;
409 	struct l_dirent linux_dirent;
410 	struct l_dirent64 linux_dirent64;
411 	int buflen, error, eofflag, nbytes, justone;
412 	u_long *cookies = NULL, *cookiep;
413 	int ncookies, vfslocked;
414 
415 	nbytes = args->count;
416 	if (nbytes == 1) {
417 		/* readdir(2) case. Always struct dirent. */
418 		if (is64bit)
419 			return (EINVAL);
420 		nbytes = sizeof(linux_dirent);
421 		justone = 1;
422 	} else
423 		justone = 0;
424 
425 	if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0)
426 		return (error);
427 
428 	if ((fp->f_flag & FREAD) == 0) {
429 		fdrop(fp, td);
430 		return (EBADF);
431 	}
432 
433 	vp = fp->f_vnode;
434 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
435 	if (vp->v_type != VDIR) {
436 		VFS_UNLOCK_GIANT(vfslocked);
437 		fdrop(fp, td);
438 		return (EINVAL);
439 	}
440 
441 	off = fp->f_offset;
442 
443 	buflen = max(LINUX_DIRBLKSIZ, nbytes);
444 	buflen = min(buflen, MAXBSIZE);
445 	buf = malloc(buflen, M_TEMP, M_WAITOK);
446 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
447 
448 again:
449 	aiov.iov_base = buf;
450 	aiov.iov_len = buflen;
451 	auio.uio_iov = &aiov;
452 	auio.uio_iovcnt = 1;
453 	auio.uio_rw = UIO_READ;
454 	auio.uio_segflg = UIO_SYSSPACE;
455 	auio.uio_td = td;
456 	auio.uio_resid = buflen;
457 	auio.uio_offset = off;
458 
459 	if (cookies) {
460 		free(cookies, M_TEMP);
461 		cookies = NULL;
462 	}
463 
464 #ifdef MAC
465 	/*
466 	 * Do directory search MAC check using non-cached credentials.
467 	 */
468 	if ((error = mac_check_vnode_readdir(td->td_ucred, vp)))
469 		goto out;
470 #endif /* MAC */
471 	if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies,
472 		 &cookies)))
473 		goto out;
474 
475 	inp = buf;
476 	outp = (caddr_t)args->dirent;
477 	resid = nbytes;
478 	if ((len = buflen - auio.uio_resid) <= 0)
479 		goto eof;
480 
481 	cookiep = cookies;
482 
483 	if (cookies) {
484 		/*
485 		 * When using cookies, the vfs has the option of reading from
486 		 * a different offset than that supplied (UFS truncates the
487 		 * offset to a block boundary to make sure that it never reads
488 		 * partway through a directory entry, even if the directory
489 		 * has been compacted).
490 		 */
491 		while (len > 0 && ncookies > 0 && *cookiep <= off) {
492 			bdp = (struct dirent *) inp;
493 			len -= bdp->d_reclen;
494 			inp += bdp->d_reclen;
495 			cookiep++;
496 			ncookies--;
497 		}
498 	}
499 
500 	while (len > 0) {
501 		if (cookiep && ncookies == 0)
502 			break;
503 		bdp = (struct dirent *) inp;
504 		reclen = bdp->d_reclen;
505 		if (reclen & 3) {
506 			error = EFAULT;
507 			goto out;
508 		}
509 
510 		if (bdp->d_fileno == 0) {
511 			inp += reclen;
512 			if (cookiep) {
513 				off = *cookiep++;
514 				ncookies--;
515 			} else
516 				off += reclen;
517 
518 			len -= reclen;
519 			continue;
520 		}
521 
522 		linuxreclen = (is64bit)
523 		    ? LINUX_RECLEN(&linux_dirent64, bdp->d_namlen)
524 		    : LINUX_RECLEN(&linux_dirent, bdp->d_namlen);
525 
526 		if (reclen > len || resid < linuxreclen) {
527 			outp++;
528 			break;
529 		}
530 
531 		if (justone) {
532 			/* readdir(2) case. */
533 			linux_dirent.d_ino = (l_long)bdp->d_fileno;
534 			linux_dirent.d_off = (l_off_t)linuxreclen;
535 			linux_dirent.d_reclen = (l_ushort)bdp->d_namlen;
536 			strcpy(linux_dirent.d_name, bdp->d_name);
537 			error = copyout(&linux_dirent, outp, linuxreclen);
538 		} else {
539 			if (is64bit) {
540 				linux_dirent64.d_ino = bdp->d_fileno;
541 				linux_dirent64.d_off = (cookiep)
542 				    ? (l_off_t)*cookiep
543 				    : (l_off_t)(off + reclen);
544 				linux_dirent64.d_reclen =
545 				    (l_ushort)linuxreclen;
546 				linux_dirent64.d_type = bdp->d_type;
547 				strcpy(linux_dirent64.d_name, bdp->d_name);
548 				error = copyout(&linux_dirent64, outp,
549 				    linuxreclen);
550 			} else {
551 				linux_dirent.d_ino = bdp->d_fileno;
552 				linux_dirent.d_off = (cookiep)
553 				    ? (l_off_t)*cookiep
554 				    : (l_off_t)(off + reclen);
555 				linux_dirent.d_reclen = (l_ushort)linuxreclen;
556 				strcpy(linux_dirent.d_name, bdp->d_name);
557 				error = copyout(&linux_dirent, outp,
558 				    linuxreclen);
559 			}
560 		}
561 		if (error)
562 			goto out;
563 
564 		inp += reclen;
565 		if (cookiep) {
566 			off = *cookiep++;
567 			ncookies--;
568 		} else
569 			off += reclen;
570 
571 		outp += linuxreclen;
572 		resid -= linuxreclen;
573 		len -= reclen;
574 		if (justone)
575 			break;
576 	}
577 
578 	if (outp == (caddr_t)args->dirent)
579 		goto again;
580 
581 	fp->f_offset = off;
582 	if (justone)
583 		nbytes = resid + linuxreclen;
584 
585 eof:
586 	td->td_retval[0] = nbytes - resid;
587 
588 out:
589 	if (cookies)
590 		free(cookies, M_TEMP);
591 
592 	VOP_UNLOCK(vp, 0, td);
593 	VFS_UNLOCK_GIANT(vfslocked);
594 	fdrop(fp, td);
595 	free(buf, M_TEMP);
596 	return (error);
597 }
598 
599 int
600 linux_getdents(struct thread *td, struct linux_getdents_args *args)
601 {
602 
603 #ifdef DEBUG
604 	if (ldebug(getdents))
605 		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
606 #endif
607 
608 	return (getdents_common(td, (struct linux_getdents64_args*)args, 0));
609 }
610 
611 int
612 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
613 {
614 
615 #ifdef DEBUG
616 	if (ldebug(getdents64))
617 		printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
618 #endif
619 
620 	return (getdents_common(td, args, 1));
621 }
622 
623 /*
624  * These exist mainly for hooks for doing /compat/linux translation.
625  */
626 
627 int
628 linux_access(struct thread *td, struct linux_access_args *args)
629 {
630 	char *path;
631 	int error;
632 
633 	/* linux convention */
634 	if (args->flags & ~(F_OK | X_OK | W_OK | R_OK))
635 		return (EINVAL);
636 
637 	LCONVPATHEXIST(td, args->path, &path);
638 
639 #ifdef DEBUG
640 	if (ldebug(access))
641 		printf(ARGS(access, "%s, %d"), path, args->flags);
642 #endif
643 	error = kern_access(td, path, UIO_SYSSPACE, args->flags);
644 	LFREEPATH(path);
645 
646 	return (error);
647 }
648 
649 int
650 linux_unlink(struct thread *td, struct linux_unlink_args *args)
651 {
652 	char *path;
653 	int error;
654 	struct stat st;
655 
656 	LCONVPATHEXIST(td, args->path, &path);
657 
658 #ifdef DEBUG
659 	if (ldebug(unlink))
660 		printf(ARGS(unlink, "%s"), path);
661 #endif
662 
663 	error = kern_unlink(td, path, UIO_SYSSPACE);
664 	if (error == EPERM)
665 		/* Introduce POSIX noncompliant behaviour of Linux */
666 		if (kern_stat(td, path, UIO_SYSSPACE, &st) == 0)
667 			if (S_ISDIR(st.st_mode))
668 				error = EISDIR;
669 	LFREEPATH(path);
670 	return (error);
671 }
672 
673 int
674 linux_chdir(struct thread *td, struct linux_chdir_args *args)
675 {
676 	char *path;
677 	int error;
678 
679 	LCONVPATHEXIST(td, args->path, &path);
680 
681 #ifdef DEBUG
682 	if (ldebug(chdir))
683 		printf(ARGS(chdir, "%s"), path);
684 #endif
685 	error = kern_chdir(td, path, UIO_SYSSPACE);
686 	LFREEPATH(path);
687 	return (error);
688 }
689 
690 int
691 linux_chmod(struct thread *td, struct linux_chmod_args *args)
692 {
693 	char *path;
694 	int error;
695 
696 	LCONVPATHEXIST(td, args->path, &path);
697 
698 #ifdef DEBUG
699 	if (ldebug(chmod))
700 		printf(ARGS(chmod, "%s, %d"), path, args->mode);
701 #endif
702 	error = kern_chmod(td, path, UIO_SYSSPACE, args->mode);
703 	LFREEPATH(path);
704 	return (error);
705 }
706 
707 int
708 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
709 {
710 	char *path;
711 	int error;
712 
713 	LCONVPATHCREAT(td, args->path, &path);
714 
715 #ifdef DEBUG
716 	if (ldebug(mkdir))
717 		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
718 #endif
719 	error = kern_mkdir(td, path, UIO_SYSSPACE, args->mode);
720 	LFREEPATH(path);
721 	return (error);
722 }
723 
724 int
725 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
726 {
727 	char *path;
728 	int error;
729 
730 	LCONVPATHEXIST(td, args->path, &path);
731 
732 #ifdef DEBUG
733 	if (ldebug(rmdir))
734 		printf(ARGS(rmdir, "%s"), path);
735 #endif
736 	error = kern_rmdir(td, path, UIO_SYSSPACE);
737 	LFREEPATH(path);
738 	return (error);
739 }
740 
741 int
742 linux_rename(struct thread *td, struct linux_rename_args *args)
743 {
744 	char *from, *to;
745 	int error;
746 
747 	LCONVPATHEXIST(td, args->from, &from);
748 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
749 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
750 	if (to == NULL) {
751 		LFREEPATH(from);
752 		return (error);
753 	}
754 
755 #ifdef DEBUG
756 	if (ldebug(rename))
757 		printf(ARGS(rename, "%s, %s"), from, to);
758 #endif
759 	error = kern_rename(td, from, to, UIO_SYSSPACE);
760 	LFREEPATH(from);
761 	LFREEPATH(to);
762 	return (error);
763 }
764 
765 int
766 linux_symlink(struct thread *td, struct linux_symlink_args *args)
767 {
768 	char *path, *to;
769 	int error;
770 
771 	LCONVPATHEXIST(td, args->path, &path);
772 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
773 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
774 	if (to == NULL) {
775 		LFREEPATH(path);
776 		return (error);
777 	}
778 
779 #ifdef DEBUG
780 	if (ldebug(symlink))
781 		printf(ARGS(symlink, "%s, %s"), path, to);
782 #endif
783 	error = kern_symlink(td, path, to, UIO_SYSSPACE);
784 	LFREEPATH(path);
785 	LFREEPATH(to);
786 	return (error);
787 }
788 
789 int
790 linux_readlink(struct thread *td, struct linux_readlink_args *args)
791 {
792 	char *name;
793 	int error;
794 
795 	LCONVPATHEXIST(td, args->name, &name);
796 
797 #ifdef DEBUG
798 	if (ldebug(readlink))
799 		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
800 		    args->count);
801 #endif
802 	error = kern_readlink(td, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE,
803 	    args->count);
804 	LFREEPATH(name);
805 	return (error);
806 }
807 
808 int
809 linux_truncate(struct thread *td, struct linux_truncate_args *args)
810 {
811 	char *path;
812 	int error;
813 
814 	LCONVPATHEXIST(td, args->path, &path);
815 
816 #ifdef DEBUG
817 	if (ldebug(truncate))
818 		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
819 #endif
820 
821 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
822 	LFREEPATH(path);
823 	return (error);
824 }
825 
826 int
827 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
828 {
829 	struct ftruncate_args /* {
830 		int fd;
831 		int pad;
832 		off_t length;
833 		} */ nuap;
834 
835 	nuap.fd = args->fd;
836 	nuap.pad = 0;
837 	nuap.length = args->length;
838 	return (ftruncate(td, &nuap));
839 }
840 
841 int
842 linux_link(struct thread *td, struct linux_link_args *args)
843 {
844 	char *path, *to;
845 	int error;
846 
847 	LCONVPATHEXIST(td, args->path, &path);
848 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
849 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1);
850 	if (to == NULL) {
851 		LFREEPATH(path);
852 		return (error);
853 	}
854 
855 #ifdef DEBUG
856 	if (ldebug(link))
857 		printf(ARGS(link, "%s, %s"), path, to);
858 #endif
859 	error = kern_link(td, path, to, UIO_SYSSPACE);
860 	LFREEPATH(path);
861 	LFREEPATH(to);
862 	return (error);
863 }
864 
865 int
866 linux_fdatasync(td, uap)
867 	struct thread *td;
868 	struct linux_fdatasync_args *uap;
869 {
870 	struct fsync_args bsd;
871 
872 	bsd.fd = uap->fd;
873 	return fsync(td, &bsd);
874 }
875 
876 int
877 linux_pread(td, uap)
878 	struct thread *td;
879 	struct linux_pread_args *uap;
880 {
881 	struct pread_args bsd;
882 	struct vnode *vp;
883 	int error;
884 
885 	bsd.fd = uap->fd;
886 	bsd.buf = uap->buf;
887 	bsd.nbyte = uap->nbyte;
888 	bsd.offset = uap->offset;
889 
890 	error = pread(td, &bsd);
891 
892 	if (error == 0) {
893    	   	/* This seems to violate POSIX but linux does it */
894    	   	if ((error = fgetvp(td, uap->fd, &vp)) != 0)
895    		   	return (error);
896 		if (vp->v_type == VDIR) {
897    		   	vrele(vp);
898 			return (EISDIR);
899 		}
900 		vrele(vp);
901 	}
902 
903 	return (error);
904 }
905 
906 int
907 linux_pwrite(td, uap)
908 	struct thread *td;
909 	struct linux_pwrite_args *uap;
910 {
911 	struct pwrite_args bsd;
912 
913 	bsd.fd = uap->fd;
914 	bsd.buf = uap->buf;
915 	bsd.nbyte = uap->nbyte;
916 	bsd.offset = uap->offset;
917 	return pwrite(td, &bsd);
918 }
919 
920 int
921 linux_mount(struct thread *td, struct linux_mount_args *args)
922 {
923 	struct ufs_args ufs;
924 	char fstypename[MFSNAMELEN];
925 	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
926 	int error;
927 	int fsflags;
928 	void *fsdata;
929 
930 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
931 	    NULL);
932 	if (error)
933 		return (error);
934 	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
935 	if (error)
936 		return (error);
937 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
938 	if (error)
939 		return (error);
940 
941 #ifdef DEBUG
942 	if (ldebug(mount))
943 		printf(ARGS(mount, "%s, %s, %s"),
944 		    fstypename, mntfromname, mntonname);
945 #endif
946 
947 	if (strcmp(fstypename, "ext2") == 0) {
948 		strcpy(fstypename, "ext2fs");
949 		fsdata = &ufs;
950 		ufs.fspec = mntfromname;
951 #define DEFAULT_ROOTID		-2
952 		ufs.export.ex_root = DEFAULT_ROOTID;
953 		ufs.export.ex_flags =
954 		    args->rwflag & LINUX_MS_RDONLY ? MNT_EXRDONLY : 0;
955 	} else if (strcmp(fstypename, "proc") == 0) {
956 		strcpy(fstypename, "linprocfs");
957 		fsdata = NULL;
958 	} else {
959 		return (ENODEV);
960 	}
961 
962 	fsflags = 0;
963 
964 	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
965 		/*
966 		 * Linux SYNC flag is not included; the closest equivalent
967 		 * FreeBSD has is !ASYNC, which is our default.
968 		 */
969 		if (args->rwflag & LINUX_MS_RDONLY)
970 			fsflags |= MNT_RDONLY;
971 		if (args->rwflag & LINUX_MS_NOSUID)
972 			fsflags |= MNT_NOSUID;
973 		if (args->rwflag & LINUX_MS_NOEXEC)
974 			fsflags |= MNT_NOEXEC;
975 		if (args->rwflag & LINUX_MS_REMOUNT)
976 			fsflags |= MNT_UPDATE;
977 	}
978 
979 	if (strcmp(fstypename, "linprocfs") == 0) {
980 		error = kernel_vmount(fsflags,
981 			"fstype", fstypename,
982 			"fspath", mntonname,
983 			NULL);
984 	} else
985 		error = EOPNOTSUPP;
986 	return (error);
987 }
988 
989 int
990 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
991 {
992 	struct linux_umount_args args2;
993 
994 	args2.path = args->path;
995 	args2.flags = 0;
996 	return (linux_umount(td, &args2));
997 }
998 
999 int
1000 linux_umount(struct thread *td, struct linux_umount_args *args)
1001 {
1002 	struct unmount_args bsd;
1003 
1004 	bsd.path = args->path;
1005 	bsd.flags = args->flags;	/* XXX correct? */
1006 	return (unmount(td, &bsd));
1007 }
1008 
1009 /*
1010  * fcntl family of syscalls
1011  */
1012 
1013 struct l_flock {
1014 	l_short		l_type;
1015 	l_short		l_whence;
1016 	l_off_t		l_start;
1017 	l_off_t		l_len;
1018 	l_pid_t		l_pid;
1019 }
1020 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1021 __packed
1022 #endif
1023 ;
1024 
1025 static void
1026 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1027 {
1028 	switch (linux_flock->l_type) {
1029 	case LINUX_F_RDLCK:
1030 		bsd_flock->l_type = F_RDLCK;
1031 		break;
1032 	case LINUX_F_WRLCK:
1033 		bsd_flock->l_type = F_WRLCK;
1034 		break;
1035 	case LINUX_F_UNLCK:
1036 		bsd_flock->l_type = F_UNLCK;
1037 		break;
1038 	default:
1039 		bsd_flock->l_type = -1;
1040 		break;
1041 	}
1042 	bsd_flock->l_whence = linux_flock->l_whence;
1043 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1044 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1045 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1046 }
1047 
1048 static void
1049 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1050 {
1051 	switch (bsd_flock->l_type) {
1052 	case F_RDLCK:
1053 		linux_flock->l_type = LINUX_F_RDLCK;
1054 		break;
1055 	case F_WRLCK:
1056 		linux_flock->l_type = LINUX_F_WRLCK;
1057 		break;
1058 	case F_UNLCK:
1059 		linux_flock->l_type = LINUX_F_UNLCK;
1060 		break;
1061 	}
1062 	linux_flock->l_whence = bsd_flock->l_whence;
1063 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1064 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1065 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1066 }
1067 
1068 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1069 struct l_flock64 {
1070 	l_short		l_type;
1071 	l_short		l_whence;
1072 	l_loff_t	l_start;
1073 	l_loff_t	l_len;
1074 	l_pid_t		l_pid;
1075 }
1076 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1077 __packed
1078 #endif
1079 ;
1080 
1081 static void
1082 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1083 {
1084 	switch (linux_flock->l_type) {
1085 	case LINUX_F_RDLCK:
1086 		bsd_flock->l_type = F_RDLCK;
1087 		break;
1088 	case LINUX_F_WRLCK:
1089 		bsd_flock->l_type = F_WRLCK;
1090 		break;
1091 	case LINUX_F_UNLCK:
1092 		bsd_flock->l_type = F_UNLCK;
1093 		break;
1094 	default:
1095 		bsd_flock->l_type = -1;
1096 		break;
1097 	}
1098 	bsd_flock->l_whence = linux_flock->l_whence;
1099 	bsd_flock->l_start = (off_t)linux_flock->l_start;
1100 	bsd_flock->l_len = (off_t)linux_flock->l_len;
1101 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1102 }
1103 
1104 static void
1105 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1106 {
1107 	switch (bsd_flock->l_type) {
1108 	case F_RDLCK:
1109 		linux_flock->l_type = LINUX_F_RDLCK;
1110 		break;
1111 	case F_WRLCK:
1112 		linux_flock->l_type = LINUX_F_WRLCK;
1113 		break;
1114 	case F_UNLCK:
1115 		linux_flock->l_type = LINUX_F_UNLCK;
1116 		break;
1117 	}
1118 	linux_flock->l_whence = bsd_flock->l_whence;
1119 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1120 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1121 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1122 }
1123 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1124 
1125 static int
1126 fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
1127 {
1128 	struct l_flock linux_flock;
1129 	struct flock bsd_flock;
1130 	struct file *fp;
1131 	long arg;
1132 	int error, result;
1133 
1134 	switch (args->cmd) {
1135 	case LINUX_F_DUPFD:
1136 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1137 
1138 	case LINUX_F_GETFD:
1139 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
1140 
1141 	case LINUX_F_SETFD:
1142 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1143 
1144 	case LINUX_F_GETFL:
1145 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
1146 		result = td->td_retval[0];
1147 		td->td_retval[0] = 0;
1148 		if (result & O_RDONLY)
1149 			td->td_retval[0] |= LINUX_O_RDONLY;
1150 		if (result & O_WRONLY)
1151 			td->td_retval[0] |= LINUX_O_WRONLY;
1152 		if (result & O_RDWR)
1153 			td->td_retval[0] |= LINUX_O_RDWR;
1154 		if (result & O_NDELAY)
1155 			td->td_retval[0] |= LINUX_O_NONBLOCK;
1156 		if (result & O_APPEND)
1157 			td->td_retval[0] |= LINUX_O_APPEND;
1158 		if (result & O_FSYNC)
1159 			td->td_retval[0] |= LINUX_O_SYNC;
1160 		if (result & O_ASYNC)
1161 			td->td_retval[0] |= LINUX_FASYNC;
1162 #ifdef LINUX_O_NOFOLLOW
1163 		if (result & O_NOFOLLOW)
1164 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
1165 #endif
1166 #ifdef LINUX_O_DIRECT
1167 		if (result & O_DIRECT)
1168 			td->td_retval[0] |= LINUX_O_DIRECT;
1169 #endif
1170 		return (error);
1171 
1172 	case LINUX_F_SETFL:
1173 		arg = 0;
1174 		if (args->arg & LINUX_O_NDELAY)
1175 			arg |= O_NONBLOCK;
1176 		if (args->arg & LINUX_O_APPEND)
1177 			arg |= O_APPEND;
1178 		if (args->arg & LINUX_O_SYNC)
1179 			arg |= O_FSYNC;
1180 		if (args->arg & LINUX_FASYNC)
1181 			arg |= O_ASYNC;
1182 #ifdef LINUX_O_NOFOLLOW
1183 		if (args->arg & LINUX_O_NOFOLLOW)
1184 			arg |= O_NOFOLLOW;
1185 #endif
1186 #ifdef LINUX_O_DIRECT
1187 		if (args->arg & LINUX_O_DIRECT)
1188 			arg |= O_DIRECT;
1189 #endif
1190 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
1191 
1192 	case LINUX_F_GETLK:
1193 		error = copyin((void *)args->arg, &linux_flock,
1194 		    sizeof(linux_flock));
1195 		if (error)
1196 			return (error);
1197 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1198 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1199 		if (error)
1200 			return (error);
1201 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
1202 		return (copyout(&linux_flock, (void *)args->arg,
1203 		    sizeof(linux_flock)));
1204 
1205 	case LINUX_F_SETLK:
1206 		error = copyin((void *)args->arg, &linux_flock,
1207 		    sizeof(linux_flock));
1208 		if (error)
1209 			return (error);
1210 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1211 		return (kern_fcntl(td, args->fd, F_SETLK,
1212 		    (intptr_t)&bsd_flock));
1213 
1214 	case LINUX_F_SETLKW:
1215 		error = copyin((void *)args->arg, &linux_flock,
1216 		    sizeof(linux_flock));
1217 		if (error)
1218 			return (error);
1219 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
1220 		return (kern_fcntl(td, args->fd, F_SETLKW,
1221 		     (intptr_t)&bsd_flock));
1222 
1223 	case LINUX_F_GETOWN:
1224 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1225 
1226 	case LINUX_F_SETOWN:
1227 		/*
1228 		 * XXX some Linux applications depend on F_SETOWN having no
1229 		 * significant effect for pipes (SIGIO is not delivered for
1230 		 * pipes under Linux-2.2.35 at least).
1231 		 */
1232 		error = fget(td, args->fd, &fp);
1233 		if (error)
1234 			return (error);
1235 		if (fp->f_type == DTYPE_PIPE) {
1236 			fdrop(fp, td);
1237 			return (EINVAL);
1238 		}
1239 		fdrop(fp, td);
1240 
1241 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1242 	}
1243 
1244 	return (EINVAL);
1245 }
1246 
1247 int
1248 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1249 {
1250 	struct linux_fcntl64_args args64;
1251 
1252 #ifdef DEBUG
1253 	if (ldebug(fcntl))
1254 		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
1255 #endif
1256 
1257 	args64.fd = args->fd;
1258 	args64.cmd = args->cmd;
1259 	args64.arg = args->arg;
1260 	return (fcntl_common(td, &args64));
1261 }
1262 
1263 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1264 int
1265 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1266 {
1267 	struct l_flock64 linux_flock;
1268 	struct flock bsd_flock;
1269 	int error;
1270 
1271 #ifdef DEBUG
1272 	if (ldebug(fcntl64))
1273 		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
1274 #endif
1275 
1276 	switch (args->cmd) {
1277 	case LINUX_F_GETLK64:
1278 		error = copyin((void *)args->arg, &linux_flock,
1279 		    sizeof(linux_flock));
1280 		if (error)
1281 			return (error);
1282 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1283 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1284 		if (error)
1285 			return (error);
1286 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1287 		return (copyout(&linux_flock, (void *)args->arg,
1288 			    sizeof(linux_flock)));
1289 
1290 	case LINUX_F_SETLK64:
1291 		error = copyin((void *)args->arg, &linux_flock,
1292 		    sizeof(linux_flock));
1293 		if (error)
1294 			return (error);
1295 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1296 		return (kern_fcntl(td, args->fd, F_SETLK,
1297 		    (intptr_t)&bsd_flock));
1298 
1299 	case LINUX_F_SETLKW64:
1300 		error = copyin((void *)args->arg, &linux_flock,
1301 		    sizeof(linux_flock));
1302 		if (error)
1303 			return (error);
1304 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1305 		return (kern_fcntl(td, args->fd, F_SETLKW,
1306 		    (intptr_t)&bsd_flock));
1307 	}
1308 
1309 	return (fcntl_common(td, args));
1310 }
1311 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1312 
1313 int
1314 linux_chown(struct thread *td, struct linux_chown_args *args)
1315 {
1316 	char *path;
1317 	int error;
1318 
1319 	LCONVPATHEXIST(td, args->path, &path);
1320 
1321 #ifdef DEBUG
1322 	if (ldebug(chown))
1323 		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
1324 #endif
1325 	error = kern_chown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1326 	LFREEPATH(path);
1327 	return (error);
1328 }
1329 
1330 int
1331 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1332 {
1333 	char *path;
1334 	int error;
1335 
1336 	LCONVPATHEXIST(td, args->path, &path);
1337 
1338 #ifdef DEBUG
1339 	if (ldebug(lchown))
1340 		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
1341 #endif
1342 	error = kern_lchown(td, path, UIO_SYSSPACE, args->uid, args->gid);
1343 	LFREEPATH(path);
1344 	return (error);
1345 }
1346