xref: /freebsd/sys/kern/vfs_syscalls.c (revision ebbd4fa8c8427d3dd847ba33c45c996e0500e6ff)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mac.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/linker.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/proc.h>
67 #include <sys/dirent.h>
68 #include <sys/extattr.h>
69 #include <sys/jail.h>
70 #include <sys/sysctl.h>
71 
72 #include <machine/limits.h>
73 #include <machine/stdarg.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int change_dir(struct nameidata *ndp, struct thread *td);
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 int (*softdep_fsync_hook)(struct vnode *);
93 
94 /*
95  * Sync each mounted filesystem.
96  */
97 #ifndef _SYS_SYSPROTO_H_
98 struct sync_args {
99         int     dummy;
100 };
101 #endif
102 
103 #ifdef DEBUG
104 static int syncprt = 0;
105 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
106 #endif
107 
108 /* ARGSUSED */
109 int
110 sync(td, uap)
111 	struct thread *td;
112 	struct sync_args *uap;
113 {
114 	struct mount *mp, *nmp;
115 	int asyncflag;
116 
117 	mtx_lock(&mountlist_mtx);
118 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
119 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
120 			nmp = TAILQ_NEXT(mp, mnt_list);
121 			continue;
122 		}
123 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
124 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
125 			asyncflag = mp->mnt_flag & MNT_ASYNC;
126 			mp->mnt_flag &= ~MNT_ASYNC;
127 			vfs_msync(mp, MNT_NOWAIT);
128 			VFS_SYNC(mp, MNT_NOWAIT,
129 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
130 			mp->mnt_flag |= asyncflag;
131 			vn_finished_write(mp);
132 		}
133 		mtx_lock(&mountlist_mtx);
134 		nmp = TAILQ_NEXT(mp, mnt_list);
135 		vfs_unbusy(mp, td);
136 	}
137 	mtx_unlock(&mountlist_mtx);
138 #if 0
139 /*
140  * XXX don't call vfs_bufstats() yet because that routine
141  * was not imported in the Lite2 merge.
142  */
143 #ifdef DIAGNOSTIC
144 	if (syncprt)
145 		vfs_bufstats();
146 #endif /* DIAGNOSTIC */
147 #endif
148 	return (0);
149 }
150 
151 /* XXX PRISON: could be per prison flag */
152 static int prison_quotas;
153 #if 0
154 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
155 #endif
156 
157 /*
158  * Change filesystem quotas.
159  */
160 #ifndef _SYS_SYSPROTO_H_
161 struct quotactl_args {
162 	char *path;
163 	int cmd;
164 	int uid;
165 	caddr_t arg;
166 };
167 #endif
168 /* ARGSUSED */
169 int
170 quotactl(td, uap)
171 	struct thread *td;
172 	register struct quotactl_args /* {
173 		syscallarg(char *) path;
174 		syscallarg(int) cmd;
175 		syscallarg(int) uid;
176 		syscallarg(caddr_t) arg;
177 	} */ *uap;
178 {
179 	struct mount *mp;
180 	int error;
181 	struct nameidata nd;
182 
183 	if (jailed(td->td_ucred) && !prison_quotas)
184 		return (EPERM);
185 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
186 	if ((error = namei(&nd)) != 0)
187 		return (error);
188 	NDFREE(&nd, NDF_ONLY_PNBUF);
189 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
190 	vrele(nd.ni_vp);
191 	if (error)
192 		return (error);
193 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
194 	    SCARG(uap, arg), td);
195 	vn_finished_write(mp);
196 	return (error);
197 }
198 
199 /*
200  * Get filesystem statistics.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct statfs_args {
204 	char *path;
205 	struct statfs *buf;
206 };
207 #endif
208 /* ARGSUSED */
209 int
210 statfs(td, uap)
211 	struct thread *td;
212 	register struct statfs_args /* {
213 		syscallarg(char *) path;
214 		syscallarg(struct statfs *) buf;
215 	} */ *uap;
216 {
217 	register struct mount *mp;
218 	register struct statfs *sp;
219 	int error;
220 	struct nameidata nd;
221 	struct statfs sb;
222 
223 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
224 	if ((error = namei(&nd)) != 0)
225 		return (error);
226 	mp = nd.ni_vp->v_mount;
227 	sp = &mp->mnt_stat;
228 	NDFREE(&nd, NDF_ONLY_PNBUF);
229 	vrele(nd.ni_vp);
230 #ifdef MAC
231 	error = mac_check_mount_stat(td->td_ucred, mp);
232 	if (error)
233 		return (error);
234 #endif
235 	error = VFS_STATFS(mp, sp, td);
236 	if (error)
237 		return (error);
238 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
239 	if (suser(td)) {
240 		bcopy(sp, &sb, sizeof(sb));
241 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
242 		sp = &sb;
243 	}
244 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
245 }
246 
247 /*
248  * Get filesystem statistics.
249  */
250 #ifndef _SYS_SYSPROTO_H_
251 struct fstatfs_args {
252 	int fd;
253 	struct statfs *buf;
254 };
255 #endif
256 /* ARGSUSED */
257 int
258 fstatfs(td, uap)
259 	struct thread *td;
260 	register struct fstatfs_args /* {
261 		syscallarg(int) fd;
262 		syscallarg(struct statfs *) buf;
263 	} */ *uap;
264 {
265 	struct file *fp;
266 	struct mount *mp;
267 	register struct statfs *sp;
268 	int error;
269 	struct statfs sb;
270 
271 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
272 		return (error);
273 	mp = ((struct vnode *)fp->f_data)->v_mount;
274 	fdrop(fp, td);
275 	if (mp == NULL)
276 		return (EBADF);
277 #ifdef MAC
278 	error = mac_check_mount_stat(td->td_ucred, mp);
279 	if (error)
280 		return (error);
281 #endif
282 	sp = &mp->mnt_stat;
283 	error = VFS_STATFS(mp, sp, td);
284 	if (error)
285 		return (error);
286 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
287 	if (suser(td)) {
288 		bcopy(sp, &sb, sizeof(sb));
289 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
290 		sp = &sb;
291 	}
292 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
293 }
294 
295 /*
296  * Get statistics on all filesystems.
297  */
298 #ifndef _SYS_SYSPROTO_H_
299 struct getfsstat_args {
300 	struct statfs *buf;
301 	long bufsize;
302 	int flags;
303 };
304 #endif
305 int
306 getfsstat(td, uap)
307 	struct thread *td;
308 	register struct getfsstat_args /* {
309 		syscallarg(struct statfs *) buf;
310 		syscallarg(long) bufsize;
311 		syscallarg(int) flags;
312 	} */ *uap;
313 {
314 	register struct mount *mp, *nmp;
315 	register struct statfs *sp;
316 	caddr_t sfsp;
317 	long count, maxcount, error;
318 
319 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
320 	sfsp = (caddr_t)SCARG(uap, buf);
321 	count = 0;
322 	mtx_lock(&mountlist_mtx);
323 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
324 #ifdef MAC
325 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
326 			nmp = TAILQ_NEXT(mp, mnt_list);
327 			continue;
328 		}
329 #endif
330 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
331 			nmp = TAILQ_NEXT(mp, mnt_list);
332 			continue;
333 		}
334 		if (sfsp && count < maxcount) {
335 			sp = &mp->mnt_stat;
336 			/*
337 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
338 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
339 			 * overrides MNT_WAIT.
340 			 */
341 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
342 			    (SCARG(uap, flags) & MNT_WAIT)) &&
343 			    (error = VFS_STATFS(mp, sp, td))) {
344 				mtx_lock(&mountlist_mtx);
345 				nmp = TAILQ_NEXT(mp, mnt_list);
346 				vfs_unbusy(mp, td);
347 				continue;
348 			}
349 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
350 			error = copyout(sp, sfsp, sizeof(*sp));
351 			if (error) {
352 				vfs_unbusy(mp, td);
353 				return (error);
354 			}
355 			sfsp += sizeof(*sp);
356 		}
357 		count++;
358 		mtx_lock(&mountlist_mtx);
359 		nmp = TAILQ_NEXT(mp, mnt_list);
360 		vfs_unbusy(mp, td);
361 	}
362 	mtx_unlock(&mountlist_mtx);
363 	if (sfsp && count > maxcount)
364 		td->td_retval[0] = maxcount;
365 	else
366 		td->td_retval[0] = count;
367 	return (0);
368 }
369 
370 /*
371  * Change current working directory to a given file descriptor.
372  */
373 #ifndef _SYS_SYSPROTO_H_
374 struct fchdir_args {
375 	int	fd;
376 };
377 #endif
378 /* ARGSUSED */
379 int
380 fchdir(td, uap)
381 	struct thread *td;
382 	struct fchdir_args /* {
383 		syscallarg(int) fd;
384 	} */ *uap;
385 {
386 	register struct filedesc *fdp = td->td_proc->p_fd;
387 	struct vnode *vp, *tdp, *vpold;
388 	struct mount *mp;
389 	struct file *fp;
390 	int error;
391 
392 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
393 		return (error);
394 	vp = (struct vnode *)fp->f_data;
395 	VREF(vp);
396 	fdrop(fp, td);
397 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
398 	if (vp->v_type != VDIR)
399 		error = ENOTDIR;
400 #ifdef MAC
401 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
402 	}
403 #endif
404 	else
405 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
406 	while (!error && (mp = vp->v_mountedhere) != NULL) {
407 		if (vfs_busy(mp, 0, 0, td))
408 			continue;
409 		error = VFS_ROOT(mp, &tdp);
410 		vfs_unbusy(mp, td);
411 		if (error)
412 			break;
413 		vput(vp);
414 		vp = tdp;
415 	}
416 	if (error) {
417 		vput(vp);
418 		return (error);
419 	}
420 	VOP_UNLOCK(vp, 0, td);
421 	FILEDESC_LOCK(fdp);
422 	vpold = fdp->fd_cdir;
423 	fdp->fd_cdir = vp;
424 	FILEDESC_UNLOCK(fdp);
425 	vrele(vpold);
426 	return (0);
427 }
428 
429 /*
430  * Change current working directory (``.'').
431  */
432 #ifndef _SYS_SYSPROTO_H_
433 struct chdir_args {
434 	char	*path;
435 };
436 #endif
437 /* ARGSUSED */
438 int
439 chdir(td, uap)
440 	struct thread *td;
441 	struct chdir_args /* {
442 		syscallarg(char *) path;
443 	} */ *uap;
444 {
445 	register struct filedesc *fdp = td->td_proc->p_fd;
446 	int error;
447 	struct nameidata nd;
448 	struct vnode *vp;
449 
450 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
451 	    SCARG(uap, path), td);
452 	if ((error = change_dir(&nd, td)) != 0)
453 		return (error);
454 	NDFREE(&nd, NDF_ONLY_PNBUF);
455 	FILEDESC_LOCK(fdp);
456 	vp = fdp->fd_cdir;
457 	fdp->fd_cdir = nd.ni_vp;
458 	FILEDESC_UNLOCK(fdp);
459 	vrele(vp);
460 	return (0);
461 }
462 
463 /*
464  * Helper function for raised chroot(2) security function:  Refuse if
465  * any filedescriptors are open directories.
466  */
467 static int
468 chroot_refuse_vdir_fds(fdp)
469 	struct filedesc *fdp;
470 {
471 	struct vnode *vp;
472 	struct file *fp;
473 	int fd;
474 
475 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
476 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
477 		fp = fget_locked(fdp, fd);
478 		if (fp == NULL)
479 			continue;
480 		if (fp->f_type == DTYPE_VNODE) {
481 			vp = (struct vnode *)fp->f_data;
482 			if (vp->v_type == VDIR)
483 				return (EPERM);
484 		}
485 	}
486 	return (0);
487 }
488 
489 /*
490  * This sysctl determines if we will allow a process to chroot(2) if it
491  * has a directory open:
492  *	0: disallowed for all processes.
493  *	1: allowed for processes that were not already chroot(2)'ed.
494  *	2: allowed for all processes.
495  */
496 
497 static int chroot_allow_open_directories = 1;
498 
499 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
500      &chroot_allow_open_directories, 0, "");
501 
502 /*
503  * Change notion of root (``/'') directory.
504  */
505 #ifndef _SYS_SYSPROTO_H_
506 struct chroot_args {
507 	char	*path;
508 };
509 #endif
510 /* ARGSUSED */
511 int
512 chroot(td, uap)
513 	struct thread *td;
514 	struct chroot_args /* {
515 		syscallarg(char *) path;
516 	} */ *uap;
517 {
518 	register struct filedesc *fdp = td->td_proc->p_fd;
519 	int error;
520 	struct nameidata nd;
521 	struct vnode *vp;
522 
523 	error = suser_cred(td->td_ucred, PRISON_ROOT);
524 	if (error)
525 		return (error);
526 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
527 	    SCARG(uap, path), td);
528 	mtx_lock(&Giant);
529 	if ((error = change_dir(&nd, td)) != 0)
530 		goto error;
531 #ifdef MAC
532 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
533 		goto error;
534 #endif
535 	FILEDESC_LOCK(fdp);
536 	if (chroot_allow_open_directories == 0 ||
537 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
538 		error = chroot_refuse_vdir_fds(fdp);
539 		if (error)
540 			goto error_unlock;
541 	}
542 	vp = fdp->fd_rdir;
543 	fdp->fd_rdir = nd.ni_vp;
544 	if (!fdp->fd_jdir) {
545 		fdp->fd_jdir = nd.ni_vp;
546                 VREF(fdp->fd_jdir);
547 	}
548 	FILEDESC_UNLOCK(fdp);
549 	NDFREE(&nd, NDF_ONLY_PNBUF);
550 	vrele(vp);
551 	mtx_unlock(&Giant);
552 	return (0);
553 error_unlock:
554 	FILEDESC_UNLOCK(fdp);
555 error:
556 	mtx_unlock(&Giant);
557 	NDFREE(&nd, 0);
558 	return (error);
559 }
560 
561 /*
562  * Common routine for chroot and chdir.
563  */
564 static int
565 change_dir(ndp, td)
566 	register struct nameidata *ndp;
567 	struct thread *td;
568 {
569 	struct vnode *vp;
570 	int error;
571 
572 	error = namei(ndp);
573 	if (error)
574 		return (error);
575 	vp = ndp->ni_vp;
576 	if (vp->v_type != VDIR)
577 		error = ENOTDIR;
578 #ifdef MAC
579 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
580 	}
581 #endif
582 	else
583 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
584 	if (error)
585 		vput(vp);
586 	else
587 		VOP_UNLOCK(vp, 0, td);
588 	return (error);
589 }
590 
591 /*
592  * Check permissions, allocate an open file structure,
593  * and call the device open routine if any.
594  */
595 #ifndef _SYS_SYSPROTO_H_
596 struct open_args {
597 	char	*path;
598 	int	flags;
599 	int	mode;
600 };
601 #endif
602 int
603 open(td, uap)
604 	struct thread *td;
605 	register struct open_args /* {
606 		syscallarg(char *) path;
607 		syscallarg(int) flags;
608 		syscallarg(int) mode;
609 	} */ *uap;
610 {
611 	struct proc *p = td->td_proc;
612 	struct filedesc *fdp = p->p_fd;
613 	struct file *fp;
614 	struct vnode *vp;
615 	struct vattr vat;
616 	struct mount *mp;
617 	int cmode, flags, oflags;
618 	struct file *nfp;
619 	int type, indx, error;
620 	struct flock lf;
621 	struct nameidata nd;
622 
623 	oflags = SCARG(uap, flags);
624 	if ((oflags & O_ACCMODE) == O_ACCMODE)
625 		return (EINVAL);
626 	flags = FFLAGS(oflags);
627 	error = falloc(td, &nfp, &indx);
628 	if (error)
629 		return (error);
630 	fp = nfp;
631 	FILEDESC_LOCK(fdp);
632 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
633 	FILEDESC_UNLOCK(fdp);
634 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
635 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
636 	/*
637 	 * Bump the ref count to prevent another process from closing
638 	 * the descriptor while we are blocked in vn_open()
639 	 */
640 	fhold(fp);
641 	error = vn_open(&nd, &flags, cmode);
642 	if (error) {
643 		/*
644 		 * release our own reference
645 		 */
646 		fdrop(fp, td);
647 
648 		/*
649 		 * handle special fdopen() case.  bleh.  dupfdopen() is
650 		 * responsible for dropping the old contents of ofiles[indx]
651 		 * if it succeeds.
652 		 */
653 		if ((error == ENODEV || error == ENXIO) &&
654 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
655 		    (error =
656 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
657 			td->td_retval[0] = indx;
658 			return (0);
659 		}
660 		/*
661 		 * Clean up the descriptor, but only if another thread hadn't
662 		 * replaced or closed it.
663 		 */
664 		FILEDESC_LOCK(fdp);
665 		if (fdp->fd_ofiles[indx] == fp) {
666 			fdp->fd_ofiles[indx] = NULL;
667 			FILEDESC_UNLOCK(fdp);
668 			fdrop(fp, td);
669 		} else
670 			FILEDESC_UNLOCK(fdp);
671 
672 		if (error == ERESTART)
673 			error = EINTR;
674 		return (error);
675 	}
676 	td->td_dupfd = 0;
677 	NDFREE(&nd, NDF_ONLY_PNBUF);
678 	vp = nd.ni_vp;
679 
680 	/*
681 	 * There should be 2 references on the file, one from the descriptor
682 	 * table, and one for us.
683 	 *
684 	 * Handle the case where someone closed the file (via its file
685 	 * descriptor) while we were blocked.  The end result should look
686 	 * like opening the file succeeded but it was immediately closed.
687 	 */
688 	FILEDESC_LOCK(fdp);
689 	FILE_LOCK(fp);
690 	if (fp->f_count == 1) {
691 		KASSERT(fdp->fd_ofiles[indx] != fp,
692 		    ("Open file descriptor lost all refs"));
693 		FILEDESC_UNLOCK(fdp);
694 		FILE_UNLOCK(fp);
695 		VOP_UNLOCK(vp, 0, td);
696 		vn_close(vp, flags & FMASK, fp->f_cred, td);
697 		fdrop(fp, td);
698 		td->td_retval[0] = indx;
699 		return 0;
700 	}
701 
702 	/* assert that vn_open created a backing object if one is needed */
703 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
704 		("open: vmio vnode has no backing object after vn_open"));
705 
706 	fp->f_data = vp;
707 	fp->f_flag = flags & FMASK;
708 	fp->f_ops = &vnops;
709 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
710 	FILEDESC_UNLOCK(fdp);
711 	FILE_UNLOCK(fp);
712 	VOP_UNLOCK(vp, 0, td);
713 	if (flags & (O_EXLOCK | O_SHLOCK)) {
714 		lf.l_whence = SEEK_SET;
715 		lf.l_start = 0;
716 		lf.l_len = 0;
717 		if (flags & O_EXLOCK)
718 			lf.l_type = F_WRLCK;
719 		else
720 			lf.l_type = F_RDLCK;
721 		type = F_FLOCK;
722 		if ((flags & FNONBLOCK) == 0)
723 			type |= F_WAIT;
724 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
725 			    type)) != 0)
726 			goto bad;
727 		fp->f_flag |= FHASLOCK;
728 	}
729 	if (flags & O_TRUNC) {
730 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
731 			goto bad;
732 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
733 		VATTR_NULL(&vat);
734 		vat.va_size = 0;
735 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
736 #ifdef MAC
737 		error = mac_check_vnode_op(td->td_ucred, vp,
738 		    MAC_OP_VNODE_WRITE);
739 		if (error == 0)
740 #endif
741 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
742 		VOP_UNLOCK(vp, 0, td);
743 		vn_finished_write(mp);
744 		if (error)
745 			goto bad;
746 	}
747 	/*
748 	 * Release our private reference, leaving the one associated with
749 	 * the descriptor table intact.
750 	 */
751 	fdrop(fp, td);
752 	td->td_retval[0] = indx;
753 	return (0);
754 bad:
755 	FILEDESC_LOCK(fdp);
756 	if (fdp->fd_ofiles[indx] == fp) {
757 		fdp->fd_ofiles[indx] = NULL;
758 		FILEDESC_UNLOCK(fdp);
759 		fdrop(fp, td);
760 	} else
761 		FILEDESC_UNLOCK(fdp);
762 	return (error);
763 }
764 
765 #ifdef COMPAT_43
766 /*
767  * Create a file.
768  */
769 #ifndef _SYS_SYSPROTO_H_
770 struct ocreat_args {
771 	char	*path;
772 	int	mode;
773 };
774 #endif
775 int
776 ocreat(td, uap)
777 	struct thread *td;
778 	register struct ocreat_args /* {
779 		syscallarg(char *) path;
780 		syscallarg(int) mode;
781 	} */ *uap;
782 {
783 	struct open_args /* {
784 		syscallarg(char *) path;
785 		syscallarg(int) flags;
786 		syscallarg(int) mode;
787 	} */ nuap;
788 
789 	SCARG(&nuap, path) = SCARG(uap, path);
790 	SCARG(&nuap, mode) = SCARG(uap, mode);
791 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
792 	return (open(td, &nuap));
793 }
794 #endif /* COMPAT_43 */
795 
796 /*
797  * Create a special file.
798  */
799 #ifndef _SYS_SYSPROTO_H_
800 struct mknod_args {
801 	char	*path;
802 	int	mode;
803 	int	dev;
804 };
805 #endif
806 /* ARGSUSED */
807 int
808 mknod(td, uap)
809 	struct thread *td;
810 	register struct mknod_args /* {
811 		syscallarg(char *) path;
812 		syscallarg(int) mode;
813 		syscallarg(int) dev;
814 	} */ *uap;
815 {
816 	struct vnode *vp;
817 	struct mount *mp;
818 	struct vattr vattr;
819 	int error;
820 	int whiteout = 0;
821 	struct nameidata nd;
822 
823 	switch (SCARG(uap, mode) & S_IFMT) {
824 	case S_IFCHR:
825 	case S_IFBLK:
826 		error = suser(td);
827 		break;
828 	default:
829 		error = suser_cred(td->td_ucred, PRISON_ROOT);
830 		break;
831 	}
832 	if (error)
833 		return (error);
834 restart:
835 	bwillwrite();
836 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, UIO_USERSPACE,
837 	    SCARG(uap, path), td);
838 	if ((error = namei(&nd)) != 0)
839 		return (error);
840 	vp = nd.ni_vp;
841 	if (vp != NULL) {
842 		vrele(vp);
843 		error = EEXIST;
844 	} else {
845 		VATTR_NULL(&vattr);
846 		FILEDESC_LOCK(td->td_proc->p_fd);
847 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
848 		FILEDESC_UNLOCK(td->td_proc->p_fd);
849 		vattr.va_rdev = SCARG(uap, dev);
850 		whiteout = 0;
851 
852 		switch (SCARG(uap, mode) & S_IFMT) {
853 		case S_IFMT:	/* used by badsect to flag bad sectors */
854 			vattr.va_type = VBAD;
855 			break;
856 		case S_IFCHR:
857 			vattr.va_type = VCHR;
858 			break;
859 		case S_IFBLK:
860 			vattr.va_type = VBLK;
861 			break;
862 		case S_IFWHT:
863 			whiteout = 1;
864 			break;
865 		default:
866 			error = EINVAL;
867 			break;
868 		}
869 	}
870 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
871 		NDFREE(&nd, NDF_ONLY_PNBUF);
872 		vput(nd.ni_dvp);
873 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
874 			return (error);
875 		goto restart;
876 	}
877 	if (!error) {
878 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
879 		if (whiteout)
880 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
881 		else {
882 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
883 						&nd.ni_cnd, &vattr);
884 			if (error == 0)
885 				vput(nd.ni_vp);
886 		}
887 	}
888 	NDFREE(&nd, NDF_ONLY_PNBUF);
889 	vput(nd.ni_dvp);
890 	vn_finished_write(mp);
891 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
892 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
893 	return (error);
894 }
895 
896 /*
897  * Create a named pipe.
898  */
899 #ifndef _SYS_SYSPROTO_H_
900 struct mkfifo_args {
901 	char	*path;
902 	int	mode;
903 };
904 #endif
905 /* ARGSUSED */
906 int
907 mkfifo(td, uap)
908 	struct thread *td;
909 	register struct mkfifo_args /* {
910 		syscallarg(char *) path;
911 		syscallarg(int) mode;
912 	} */ *uap;
913 {
914 	struct mount *mp;
915 	struct vattr vattr;
916 	int error;
917 	struct nameidata nd;
918 
919 restart:
920 	bwillwrite();
921 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, UIO_USERSPACE,
922 	    SCARG(uap, path), td);
923 	if ((error = namei(&nd)) != 0)
924 		return (error);
925 	if (nd.ni_vp != NULL) {
926 		NDFREE(&nd, NDF_ONLY_PNBUF);
927 		vrele(nd.ni_vp);
928 		vput(nd.ni_dvp);
929 		return (EEXIST);
930 	}
931 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
932 		NDFREE(&nd, NDF_ONLY_PNBUF);
933 		vput(nd.ni_dvp);
934 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
935 			return (error);
936 		goto restart;
937 	}
938 	VATTR_NULL(&vattr);
939 	vattr.va_type = VFIFO;
940 	FILEDESC_LOCK(td->td_proc->p_fd);
941 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
942 	FILEDESC_UNLOCK(td->td_proc->p_fd);
943 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
944 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
945 	if (error == 0)
946 		vput(nd.ni_vp);
947 	NDFREE(&nd, NDF_ONLY_PNBUF);
948 	vput(nd.ni_dvp);
949 	vn_finished_write(mp);
950 	return (error);
951 }
952 
953 /*
954  * Make a hard file link.
955  */
956 #ifndef _SYS_SYSPROTO_H_
957 struct link_args {
958 	char	*path;
959 	char	*link;
960 };
961 #endif
962 /* ARGSUSED */
963 int
964 link(td, uap)
965 	struct thread *td;
966 	register struct link_args /* {
967 		syscallarg(char *) path;
968 		syscallarg(char *) link;
969 	} */ *uap;
970 {
971 	struct vnode *vp;
972 	struct mount *mp;
973 	struct nameidata nd;
974 	int error;
975 
976 	bwillwrite();
977 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
978 	if ((error = namei(&nd)) != 0)
979 		return (error);
980 	NDFREE(&nd, NDF_ONLY_PNBUF);
981 	vp = nd.ni_vp;
982 	if (vp->v_type == VDIR) {
983 		vrele(vp);
984 		return (EPERM);		/* POSIX */
985 	}
986 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
987 		vrele(vp);
988 		return (error);
989 	}
990 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, UIO_USERSPACE,
991 	    SCARG(uap, link), td);
992 	if ((error = namei(&nd)) == 0) {
993 		if (nd.ni_vp != NULL) {
994 			vrele(nd.ni_vp);
995 			error = EEXIST;
996 		} else {
997 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
998 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
999 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1000 		}
1001 		NDFREE(&nd, NDF_ONLY_PNBUF);
1002 		vput(nd.ni_dvp);
1003 	}
1004 	vrele(vp);
1005 	vn_finished_write(mp);
1006 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1007 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1008 	return (error);
1009 }
1010 
1011 /*
1012  * Make a symbolic link.
1013  */
1014 #ifndef _SYS_SYSPROTO_H_
1015 struct symlink_args {
1016 	char	*path;
1017 	char	*link;
1018 };
1019 #endif
1020 /* ARGSUSED */
1021 int
1022 symlink(td, uap)
1023 	struct thread *td;
1024 	register struct symlink_args /* {
1025 		syscallarg(char *) path;
1026 		syscallarg(char *) link;
1027 	} */ *uap;
1028 {
1029 	struct mount *mp;
1030 	struct vattr vattr;
1031 	char *path;
1032 	int error;
1033 	struct nameidata nd;
1034 
1035 	path = uma_zalloc(namei_zone, M_WAITOK);
1036 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1037 		goto out;
1038 restart:
1039 	bwillwrite();
1040 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, UIO_USERSPACE,
1041 	    SCARG(uap, link), td);
1042 	if ((error = namei(&nd)) != 0)
1043 		goto out;
1044 	if (nd.ni_vp) {
1045 		NDFREE(&nd, NDF_ONLY_PNBUF);
1046 		vrele(nd.ni_vp);
1047 		vput(nd.ni_dvp);
1048 		error = EEXIST;
1049 		goto out;
1050 	}
1051 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1052 		NDFREE(&nd, NDF_ONLY_PNBUF);
1053 		vput(nd.ni_dvp);
1054 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1055 			return (error);
1056 		goto restart;
1057 	}
1058 	VATTR_NULL(&vattr);
1059 	FILEDESC_LOCK(td->td_proc->p_fd);
1060 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1061 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1062 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1063 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1064 	NDFREE(&nd, NDF_ONLY_PNBUF);
1065 	if (error == 0)
1066 		vput(nd.ni_vp);
1067 	vput(nd.ni_dvp);
1068 	vn_finished_write(mp);
1069 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1070 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1071 out:
1072 	uma_zfree(namei_zone, path);
1073 	return (error);
1074 }
1075 
1076 /*
1077  * Delete a whiteout from the filesystem.
1078  */
1079 /* ARGSUSED */
1080 int
1081 undelete(td, uap)
1082 	struct thread *td;
1083 	register struct undelete_args /* {
1084 		syscallarg(char *) path;
1085 	} */ *uap;
1086 {
1087 	int error;
1088 	struct mount *mp;
1089 	struct nameidata nd;
1090 
1091 restart:
1092 	bwillwrite();
1093 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1094 	    SCARG(uap, path), td);
1095 	error = namei(&nd);
1096 	if (error)
1097 		return (error);
1098 
1099 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1100 		NDFREE(&nd, NDF_ONLY_PNBUF);
1101 		if (nd.ni_vp)
1102 			vrele(nd.ni_vp);
1103 		vput(nd.ni_dvp);
1104 		return (EEXIST);
1105 	}
1106 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1107 		NDFREE(&nd, NDF_ONLY_PNBUF);
1108 		vput(nd.ni_dvp);
1109 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1110 			return (error);
1111 		goto restart;
1112 	}
1113 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1114 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1115 	NDFREE(&nd, NDF_ONLY_PNBUF);
1116 	vput(nd.ni_dvp);
1117 	vn_finished_write(mp);
1118 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1119 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1120 	return (error);
1121 }
1122 
1123 /*
1124  * Delete a name from the filesystem.
1125  */
1126 #ifndef _SYS_SYSPROTO_H_
1127 struct unlink_args {
1128 	char	*path;
1129 };
1130 #endif
1131 /* ARGSUSED */
1132 int
1133 unlink(td, uap)
1134 	struct thread *td;
1135 	struct unlink_args /* {
1136 		syscallarg(char *) path;
1137 	} */ *uap;
1138 {
1139 	struct mount *mp;
1140 	struct vnode *vp;
1141 	int error;
1142 	struct nameidata nd;
1143 
1144 restart:
1145 	bwillwrite();
1146 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1147 	if ((error = namei(&nd)) != 0)
1148 		return (error);
1149 	vp = nd.ni_vp;
1150 	if (vp->v_type == VDIR)
1151 		error = EPERM;		/* POSIX */
1152 	else {
1153 		/*
1154 		 * The root of a mounted filesystem cannot be deleted.
1155 		 *
1156 		 * XXX: can this only be a VDIR case?
1157 		 */
1158 		if (vp->v_flag & VROOT)
1159 			error = EBUSY;
1160 	}
1161 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1162 		NDFREE(&nd, NDF_ONLY_PNBUF);
1163 		vrele(vp);
1164 		vput(nd.ni_dvp);
1165 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1166 			return (error);
1167 		goto restart;
1168 	}
1169 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1170 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1171 	if (!error) {
1172 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1173 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1174 	}
1175 	NDFREE(&nd, NDF_ONLY_PNBUF);
1176 	vput(nd.ni_dvp);
1177 	vput(vp);
1178 	vn_finished_write(mp);
1179 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1180 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1181 	return (error);
1182 }
1183 
1184 /*
1185  * Reposition read/write file offset.
1186  */
1187 #ifndef _SYS_SYSPROTO_H_
1188 struct lseek_args {
1189 	int	fd;
1190 	int	pad;
1191 	off_t	offset;
1192 	int	whence;
1193 };
1194 #endif
1195 int
1196 lseek(td, uap)
1197 	struct thread *td;
1198 	register struct lseek_args /* {
1199 		syscallarg(int) fd;
1200 		syscallarg(int) pad;
1201 		syscallarg(off_t) offset;
1202 		syscallarg(int) whence;
1203 	} */ *uap;
1204 {
1205 	struct ucred *cred = td->td_ucred;
1206 	struct file *fp;
1207 	struct vnode *vp;
1208 	struct vattr vattr;
1209 	off_t offset;
1210 	int error, noneg;
1211 
1212 	if ((error = fget(td, uap->fd, &fp)) != 0)
1213 		return (error);
1214 	if (fp->f_type != DTYPE_VNODE) {
1215 		fdrop(fp, td);
1216 		return (ESPIPE);
1217 	}
1218 	vp = (struct vnode *)fp->f_data;
1219 	noneg = (vp->v_type != VCHR);
1220 	offset = SCARG(uap, offset);
1221 	switch (SCARG(uap, whence)) {
1222 	case L_INCR:
1223 		if (noneg &&
1224 		    (fp->f_offset < 0 ||
1225 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1226 			return (EOVERFLOW);
1227 		offset += fp->f_offset;
1228 		break;
1229 	case L_XTND:
1230 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1231 		error = VOP_GETATTR(vp, &vattr, cred, td);
1232 		VOP_UNLOCK(vp, 0, td);
1233 		if (error)
1234 			return (error);
1235 		if (noneg &&
1236 		    (vattr.va_size > OFF_MAX ||
1237 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1238 			return (EOVERFLOW);
1239 		offset += vattr.va_size;
1240 		break;
1241 	case L_SET:
1242 		break;
1243 	default:
1244 		fdrop(fp, td);
1245 		return (EINVAL);
1246 	}
1247 	if (noneg && offset < 0)
1248 		return (EINVAL);
1249 	fp->f_offset = offset;
1250 	*(off_t *)(td->td_retval) = fp->f_offset;
1251 	fdrop(fp, td);
1252 	return (0);
1253 }
1254 
1255 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1256 /*
1257  * Reposition read/write file offset.
1258  */
1259 #ifndef _SYS_SYSPROTO_H_
1260 struct olseek_args {
1261 	int	fd;
1262 	long	offset;
1263 	int	whence;
1264 };
1265 #endif
1266 int
1267 olseek(td, uap)
1268 	struct thread *td;
1269 	register struct olseek_args /* {
1270 		syscallarg(int) fd;
1271 		syscallarg(long) offset;
1272 		syscallarg(int) whence;
1273 	} */ *uap;
1274 {
1275 	struct lseek_args /* {
1276 		syscallarg(int) fd;
1277 		syscallarg(int) pad;
1278 		syscallarg(off_t) offset;
1279 		syscallarg(int) whence;
1280 	} */ nuap;
1281 	int error;
1282 
1283 	SCARG(&nuap, fd) = SCARG(uap, fd);
1284 	SCARG(&nuap, offset) = SCARG(uap, offset);
1285 	SCARG(&nuap, whence) = SCARG(uap, whence);
1286 	error = lseek(td, &nuap);
1287 	return (error);
1288 }
1289 #endif /* COMPAT_43 */
1290 
1291 /*
1292  * Check access permissions using passed credentials.
1293  */
1294 static int
1295 vn_access(vp, user_flags, cred, td)
1296 	struct vnode	*vp;
1297 	int		user_flags;
1298 	struct ucred	*cred;
1299 	struct thread	*td;
1300 {
1301 	int error, flags;
1302 
1303 	/* Flags == 0 means only check for existence. */
1304 	error = 0;
1305 	if (user_flags) {
1306 		flags = 0;
1307 		if (user_flags & R_OK)
1308 			flags |= VREAD;
1309 		if (user_flags & W_OK)
1310 			flags |= VWRITE;
1311 		if (user_flags & X_OK)
1312 			flags |= VEXEC;
1313 #ifdef MAC
1314 		error = mac_check_vnode_access(cred, vp, flags);
1315 		if (error)
1316 			return (error);
1317 #endif
1318 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1319 			error = VOP_ACCESS(vp, flags, cred, td);
1320 	}
1321 	return (error);
1322 }
1323 
1324 /*
1325  * Check access permissions using "real" credentials.
1326  */
1327 #ifndef _SYS_SYSPROTO_H_
1328 struct access_args {
1329 	char	*path;
1330 	int	flags;
1331 };
1332 #endif
1333 int
1334 access(td, uap)
1335 	struct thread *td;
1336 	register struct access_args /* {
1337 		syscallarg(char *) path;
1338 		syscallarg(int) flags;
1339 	} */ *uap;
1340 {
1341 	struct ucred *cred, *tmpcred;
1342 	register struct vnode *vp;
1343 	int error;
1344 	struct nameidata nd;
1345 
1346 	/*
1347 	 * Create and modify a temporary credential instead of one that
1348 	 * is potentially shared.  This could also mess up socket
1349 	 * buffer accounting which can run in an interrupt context.
1350 	 *
1351 	 * XXX - Depending on how "threads" are finally implemented, it
1352 	 * may be better to explicitly pass the credential to namei()
1353 	 * rather than to modify the potentially shared process structure.
1354 	 */
1355 	cred = td->td_ucred;
1356 	tmpcred = crdup(cred);
1357 	tmpcred->cr_uid = cred->cr_ruid;
1358 	tmpcred->cr_groups[0] = cred->cr_rgid;
1359 	td->td_ucred = tmpcred;
1360 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1361 	    SCARG(uap, path), td);
1362 	if ((error = namei(&nd)) != 0)
1363 		goto out1;
1364 	vp = nd.ni_vp;
1365 
1366 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1367 	NDFREE(&nd, NDF_ONLY_PNBUF);
1368 	vput(vp);
1369 out1:
1370 	td->td_ucred = cred;
1371 	crfree(tmpcred);
1372 	return (error);
1373 }
1374 
1375 /*
1376  * Check access permissions using "effective" credentials.
1377  */
1378 #ifndef _SYS_SYSPROTO_H_
1379 struct eaccess_args {
1380 	char	*path;
1381 	int	flags;
1382 };
1383 #endif
1384 int
1385 eaccess(td, uap)
1386 	struct thread *td;
1387 	register struct eaccess_args /* {
1388 		syscallarg(char *) path;
1389 		syscallarg(int) flags;
1390 	} */ *uap;
1391 {
1392 	struct nameidata nd;
1393 	struct vnode *vp;
1394 	int error;
1395 
1396 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1397 	    SCARG(uap, path), td);
1398 	if ((error = namei(&nd)) != 0)
1399 		return (error);
1400 	vp = nd.ni_vp;
1401 
1402 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1403 	NDFREE(&nd, NDF_ONLY_PNBUF);
1404 	vput(vp);
1405 	return (error);
1406 }
1407 
1408 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1409 /*
1410  * Get file status; this version follows links.
1411  */
1412 #ifndef _SYS_SYSPROTO_H_
1413 struct ostat_args {
1414 	char	*path;
1415 	struct ostat *ub;
1416 };
1417 #endif
1418 /* ARGSUSED */
1419 int
1420 ostat(td, uap)
1421 	struct thread *td;
1422 	register struct ostat_args /* {
1423 		syscallarg(char *) path;
1424 		syscallarg(struct ostat *) ub;
1425 	} */ *uap;
1426 {
1427 	struct stat sb;
1428 	struct ostat osb;
1429 	int error;
1430 	struct nameidata nd;
1431 
1432 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1433 	    SCARG(uap, path), td);
1434 	if ((error = namei(&nd)) != 0)
1435 		return (error);
1436 	NDFREE(&nd, NDF_ONLY_PNBUF);
1437 	error = vn_stat(nd.ni_vp, &sb, td);
1438 	vput(nd.ni_vp);
1439 	if (error)
1440 		return (error);
1441 	cvtstat(&sb, &osb);
1442 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1443 	return (error);
1444 }
1445 
1446 /*
1447  * Get file status; this version does not follow links.
1448  */
1449 #ifndef _SYS_SYSPROTO_H_
1450 struct olstat_args {
1451 	char	*path;
1452 	struct ostat *ub;
1453 };
1454 #endif
1455 /* ARGSUSED */
1456 int
1457 olstat(td, uap)
1458 	struct thread *td;
1459 	register struct olstat_args /* {
1460 		syscallarg(char *) path;
1461 		syscallarg(struct ostat *) ub;
1462 	} */ *uap;
1463 {
1464 	struct vnode *vp;
1465 	struct stat sb;
1466 	struct ostat osb;
1467 	int error;
1468 	struct nameidata nd;
1469 
1470 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1471 	    SCARG(uap, path), td);
1472 	if ((error = namei(&nd)) != 0)
1473 		return (error);
1474 	vp = nd.ni_vp;
1475 	error = vn_stat(vp, &sb, td);
1476 	NDFREE(&nd, NDF_ONLY_PNBUF);
1477 	vput(vp);
1478 	if (error)
1479 		return (error);
1480 	cvtstat(&sb, &osb);
1481 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1482 	return (error);
1483 }
1484 
1485 /*
1486  * Convert from an old to a new stat structure.
1487  */
1488 void
1489 cvtstat(st, ost)
1490 	struct stat *st;
1491 	struct ostat *ost;
1492 {
1493 
1494 	ost->st_dev = st->st_dev;
1495 	ost->st_ino = st->st_ino;
1496 	ost->st_mode = st->st_mode;
1497 	ost->st_nlink = st->st_nlink;
1498 	ost->st_uid = st->st_uid;
1499 	ost->st_gid = st->st_gid;
1500 	ost->st_rdev = st->st_rdev;
1501 	if (st->st_size < (quad_t)1 << 32)
1502 		ost->st_size = st->st_size;
1503 	else
1504 		ost->st_size = -2;
1505 	ost->st_atime = st->st_atime;
1506 	ost->st_mtime = st->st_mtime;
1507 	ost->st_ctime = st->st_ctime;
1508 	ost->st_blksize = st->st_blksize;
1509 	ost->st_blocks = st->st_blocks;
1510 	ost->st_flags = st->st_flags;
1511 	ost->st_gen = st->st_gen;
1512 }
1513 #endif /* COMPAT_43 || COMPAT_SUNOS */
1514 
1515 /*
1516  * Get file status; this version follows links.
1517  */
1518 #ifndef _SYS_SYSPROTO_H_
1519 struct stat_args {
1520 	char	*path;
1521 	struct stat *ub;
1522 };
1523 #endif
1524 /* ARGSUSED */
1525 int
1526 stat(td, uap)
1527 	struct thread *td;
1528 	register struct stat_args /* {
1529 		syscallarg(char *) path;
1530 		syscallarg(struct stat *) ub;
1531 	} */ *uap;
1532 {
1533 	struct stat sb;
1534 	int error;
1535 	struct nameidata nd;
1536 
1537 #ifdef LOOKUP_SHARED
1538 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1539 	    UIO_USERSPACE, SCARG(uap, path), td);
1540 #else
1541 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1542 	    SCARG(uap, path), td);
1543 #endif
1544 	if ((error = namei(&nd)) != 0)
1545 		return (error);
1546 	error = vn_stat(nd.ni_vp, &sb, td);
1547 	NDFREE(&nd, NDF_ONLY_PNBUF);
1548 	vput(nd.ni_vp);
1549 	if (error)
1550 		return (error);
1551 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1552 	return (error);
1553 }
1554 
1555 /*
1556  * Get file status; this version does not follow links.
1557  */
1558 #ifndef _SYS_SYSPROTO_H_
1559 struct lstat_args {
1560 	char	*path;
1561 	struct stat *ub;
1562 };
1563 #endif
1564 /* ARGSUSED */
1565 int
1566 lstat(td, uap)
1567 	struct thread *td;
1568 	register struct lstat_args /* {
1569 		syscallarg(char *) path;
1570 		syscallarg(struct stat *) ub;
1571 	} */ *uap;
1572 {
1573 	int error;
1574 	struct vnode *vp;
1575 	struct stat sb;
1576 	struct nameidata nd;
1577 
1578 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1579 	    SCARG(uap, path), td);
1580 	if ((error = namei(&nd)) != 0)
1581 		return (error);
1582 	vp = nd.ni_vp;
1583 	error = vn_stat(vp, &sb, td);
1584 	NDFREE(&nd, NDF_ONLY_PNBUF);
1585 	vput(vp);
1586 	if (error)
1587 		return (error);
1588 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1589 	return (error);
1590 }
1591 
1592 /*
1593  * Implementation of the NetBSD stat() function.
1594  * XXX This should probably be collapsed with the FreeBSD version,
1595  * as the differences are only due to vn_stat() clearing spares at
1596  * the end of the structures.  vn_stat could be split to avoid this,
1597  * and thus collapse the following to close to zero code.
1598  */
1599 void
1600 cvtnstat(sb, nsb)
1601 	struct stat *sb;
1602 	struct nstat *nsb;
1603 {
1604 	bzero(nsb, sizeof *nsb);
1605 	nsb->st_dev = sb->st_dev;
1606 	nsb->st_ino = sb->st_ino;
1607 	nsb->st_mode = sb->st_mode;
1608 	nsb->st_nlink = sb->st_nlink;
1609 	nsb->st_uid = sb->st_uid;
1610 	nsb->st_gid = sb->st_gid;
1611 	nsb->st_rdev = sb->st_rdev;
1612 	nsb->st_atimespec = sb->st_atimespec;
1613 	nsb->st_mtimespec = sb->st_mtimespec;
1614 	nsb->st_ctimespec = sb->st_ctimespec;
1615 	nsb->st_size = sb->st_size;
1616 	nsb->st_blocks = sb->st_blocks;
1617 	nsb->st_blksize = sb->st_blksize;
1618 	nsb->st_flags = sb->st_flags;
1619 	nsb->st_gen = sb->st_gen;
1620 	nsb->st_birthtimespec = sb->st_birthtimespec;
1621 }
1622 
1623 #ifndef _SYS_SYSPROTO_H_
1624 struct nstat_args {
1625 	char	*path;
1626 	struct nstat *ub;
1627 };
1628 #endif
1629 /* ARGSUSED */
1630 int
1631 nstat(td, uap)
1632 	struct thread *td;
1633 	register struct nstat_args /* {
1634 		syscallarg(char *) path;
1635 		syscallarg(struct nstat *) ub;
1636 	} */ *uap;
1637 {
1638 	struct stat sb;
1639 	struct nstat nsb;
1640 	int error;
1641 	struct nameidata nd;
1642 
1643 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1644 	    SCARG(uap, path), td);
1645 	if ((error = namei(&nd)) != 0)
1646 		return (error);
1647 	NDFREE(&nd, NDF_ONLY_PNBUF);
1648 	error = vn_stat(nd.ni_vp, &sb, td);
1649 	vput(nd.ni_vp);
1650 	if (error)
1651 		return (error);
1652 	cvtnstat(&sb, &nsb);
1653 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1654 	return (error);
1655 }
1656 
1657 /*
1658  * NetBSD lstat.  Get file status; this version does not follow links.
1659  */
1660 #ifndef _SYS_SYSPROTO_H_
1661 struct lstat_args {
1662 	char	*path;
1663 	struct stat *ub;
1664 };
1665 #endif
1666 /* ARGSUSED */
1667 int
1668 nlstat(td, uap)
1669 	struct thread *td;
1670 	register struct nlstat_args /* {
1671 		syscallarg(char *) path;
1672 		syscallarg(struct nstat *) ub;
1673 	} */ *uap;
1674 {
1675 	int error;
1676 	struct vnode *vp;
1677 	struct stat sb;
1678 	struct nstat nsb;
1679 	struct nameidata nd;
1680 
1681 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1682 	    SCARG(uap, path), td);
1683 	if ((error = namei(&nd)) != 0)
1684 		return (error);
1685 	vp = nd.ni_vp;
1686 	NDFREE(&nd, NDF_ONLY_PNBUF);
1687 	error = vn_stat(vp, &sb, td);
1688 	vput(vp);
1689 	if (error)
1690 		return (error);
1691 	cvtnstat(&sb, &nsb);
1692 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1693 	return (error);
1694 }
1695 
1696 /*
1697  * Get configurable pathname variables.
1698  */
1699 #ifndef _SYS_SYSPROTO_H_
1700 struct pathconf_args {
1701 	char	*path;
1702 	int	name;
1703 };
1704 #endif
1705 /* ARGSUSED */
1706 int
1707 pathconf(td, uap)
1708 	struct thread *td;
1709 	register struct pathconf_args /* {
1710 		syscallarg(char *) path;
1711 		syscallarg(int) name;
1712 	} */ *uap;
1713 {
1714 	int error;
1715 	struct nameidata nd;
1716 
1717 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1718 	    SCARG(uap, path), td);
1719 	if ((error = namei(&nd)) != 0)
1720 		return (error);
1721 	NDFREE(&nd, NDF_ONLY_PNBUF);
1722 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
1723 	vput(nd.ni_vp);
1724 	return (error);
1725 }
1726 
1727 /*
1728  * Return target name of a symbolic link.
1729  */
1730 #ifndef _SYS_SYSPROTO_H_
1731 struct readlink_args {
1732 	char	*path;
1733 	char	*buf;
1734 	int	count;
1735 };
1736 #endif
1737 /* ARGSUSED */
1738 int
1739 readlink(td, uap)
1740 	struct thread *td;
1741 	register struct readlink_args /* {
1742 		syscallarg(char *) path;
1743 		syscallarg(char *) buf;
1744 		syscallarg(int) count;
1745 	} */ *uap;
1746 {
1747 	register struct vnode *vp;
1748 	struct iovec aiov;
1749 	struct uio auio;
1750 	int error;
1751 	struct nameidata nd;
1752 
1753 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1754 	    SCARG(uap, path), td);
1755 	if ((error = namei(&nd)) != 0)
1756 		return (error);
1757 	NDFREE(&nd, NDF_ONLY_PNBUF);
1758 	vp = nd.ni_vp;
1759 #ifdef MAC
1760 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1761 	if (error) {
1762 		vput(vp);
1763 		return (error);
1764 	}
1765 #endif
1766 	if (vp->v_type != VLNK)
1767 		error = EINVAL;
1768 	else {
1769 		aiov.iov_base = SCARG(uap, buf);
1770 		aiov.iov_len = SCARG(uap, count);
1771 		auio.uio_iov = &aiov;
1772 		auio.uio_iovcnt = 1;
1773 		auio.uio_offset = 0;
1774 		auio.uio_rw = UIO_READ;
1775 		auio.uio_segflg = UIO_USERSPACE;
1776 		auio.uio_td = td;
1777 		auio.uio_resid = SCARG(uap, count);
1778 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1779 	}
1780 	vput(vp);
1781 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
1782 	return (error);
1783 }
1784 
1785 /*
1786  * Common implementation code for chflags() and fchflags().
1787  */
1788 static int
1789 setfflags(td, vp, flags)
1790 	struct thread *td;
1791 	struct vnode *vp;
1792 	int flags;
1793 {
1794 	int error;
1795 	struct mount *mp;
1796 	struct vattr vattr;
1797 
1798 	/*
1799 	 * Prevent non-root users from setting flags on devices.  When
1800 	 * a device is reused, users can retain ownership of the device
1801 	 * if they are allowed to set flags and programs assume that
1802 	 * chown can't fail when done as root.
1803 	 */
1804 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1805 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1806 		if (error)
1807 			return (error);
1808 	}
1809 
1810 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1811 		return (error);
1812 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1813 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1814 #ifdef MAC
1815 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1816 	if (error == 0) {
1817 #endif
1818 		VATTR_NULL(&vattr);
1819 		vattr.va_flags = flags;
1820 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1821 #ifdef MAC
1822 	}
1823 #endif
1824 	VOP_UNLOCK(vp, 0, td);
1825 	vn_finished_write(mp);
1826 	return (error);
1827 }
1828 
1829 /*
1830  * Change flags of a file given a path name.
1831  */
1832 #ifndef _SYS_SYSPROTO_H_
1833 struct chflags_args {
1834 	char	*path;
1835 	int	flags;
1836 };
1837 #endif
1838 /* ARGSUSED */
1839 int
1840 chflags(td, uap)
1841 	struct thread *td;
1842 	register struct chflags_args /* {
1843 		syscallarg(char *) path;
1844 		syscallarg(int) flags;
1845 	} */ *uap;
1846 {
1847 	int error;
1848 	struct nameidata nd;
1849 
1850 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1851 	if ((error = namei(&nd)) != 0)
1852 		return (error);
1853 	NDFREE(&nd, NDF_ONLY_PNBUF);
1854 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1855 	vrele(nd.ni_vp);
1856 	return error;
1857 }
1858 
1859 /*
1860  * Same as chflags() but doesn't follow symlinks.
1861  */
1862 int
1863 lchflags(td, uap)
1864 	struct thread *td;
1865 	register struct lchflags_args /* {
1866 		syscallarg(char *) path;
1867 		syscallarg(int) flags;
1868 	} */ *uap;
1869 {
1870 	int error;
1871 	struct nameidata nd;
1872 
1873 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1874 	if ((error = namei(&nd)) != 0)
1875 		return (error);
1876 	NDFREE(&nd, NDF_ONLY_PNBUF);
1877 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1878 	vrele(nd.ni_vp);
1879 	return error;
1880 }
1881 
1882 /*
1883  * Change flags of a file given a file descriptor.
1884  */
1885 #ifndef _SYS_SYSPROTO_H_
1886 struct fchflags_args {
1887 	int	fd;
1888 	int	flags;
1889 };
1890 #endif
1891 /* ARGSUSED */
1892 int
1893 fchflags(td, uap)
1894 	struct thread *td;
1895 	register struct fchflags_args /* {
1896 		syscallarg(int) fd;
1897 		syscallarg(int) flags;
1898 	} */ *uap;
1899 {
1900 	struct file *fp;
1901 	int error;
1902 
1903 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1904 		return (error);
1905 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
1906 	fdrop(fp, td);
1907 	return (error);
1908 }
1909 
1910 /*
1911  * Common implementation code for chmod(), lchmod() and fchmod().
1912  */
1913 static int
1914 setfmode(td, vp, mode)
1915 	struct thread *td;
1916 	struct vnode *vp;
1917 	int mode;
1918 {
1919 	int error;
1920 	struct mount *mp;
1921 	struct vattr vattr;
1922 
1923 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1924 		return (error);
1925 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1926 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1927 	VATTR_NULL(&vattr);
1928 	vattr.va_mode = mode & ALLPERMS;
1929 #ifdef MAC
1930 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
1931 	if (error == 0)
1932 #endif
1933 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1934 	VOP_UNLOCK(vp, 0, td);
1935 	vn_finished_write(mp);
1936 	return error;
1937 }
1938 
1939 /*
1940  * Change mode of a file given path name.
1941  */
1942 #ifndef _SYS_SYSPROTO_H_
1943 struct chmod_args {
1944 	char	*path;
1945 	int	mode;
1946 };
1947 #endif
1948 /* ARGSUSED */
1949 int
1950 chmod(td, uap)
1951 	struct thread *td;
1952 	register struct chmod_args /* {
1953 		syscallarg(char *) path;
1954 		syscallarg(int) mode;
1955 	} */ *uap;
1956 {
1957 	int error;
1958 	struct nameidata nd;
1959 
1960 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1961 	if ((error = namei(&nd)) != 0)
1962 		return (error);
1963 	NDFREE(&nd, NDF_ONLY_PNBUF);
1964 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
1965 	vrele(nd.ni_vp);
1966 	return error;
1967 }
1968 
1969 /*
1970  * Change mode of a file given path name (don't follow links.)
1971  */
1972 #ifndef _SYS_SYSPROTO_H_
1973 struct lchmod_args {
1974 	char	*path;
1975 	int	mode;
1976 };
1977 #endif
1978 /* ARGSUSED */
1979 int
1980 lchmod(td, uap)
1981 	struct thread *td;
1982 	register struct lchmod_args /* {
1983 		syscallarg(char *) path;
1984 		syscallarg(int) mode;
1985 	} */ *uap;
1986 {
1987 	int error;
1988 	struct nameidata nd;
1989 
1990 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1991 	if ((error = namei(&nd)) != 0)
1992 		return (error);
1993 	NDFREE(&nd, NDF_ONLY_PNBUF);
1994 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
1995 	vrele(nd.ni_vp);
1996 	return error;
1997 }
1998 
1999 /*
2000  * Change mode of a file given a file descriptor.
2001  */
2002 #ifndef _SYS_SYSPROTO_H_
2003 struct fchmod_args {
2004 	int	fd;
2005 	int	mode;
2006 };
2007 #endif
2008 /* ARGSUSED */
2009 int
2010 fchmod(td, uap)
2011 	struct thread *td;
2012 	register struct fchmod_args /* {
2013 		syscallarg(int) fd;
2014 		syscallarg(int) mode;
2015 	} */ *uap;
2016 {
2017 	struct file *fp;
2018 	struct vnode *vp;
2019 	int error;
2020 
2021 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2022 		return (error);
2023 	vp = (struct vnode *)fp->f_data;
2024 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2025 	fdrop(fp, td);
2026 	return (error);
2027 }
2028 
2029 /*
2030  * Common implementation for chown(), lchown(), and fchown()
2031  */
2032 static int
2033 setfown(td, vp, uid, gid)
2034 	struct thread *td;
2035 	struct vnode *vp;
2036 	uid_t uid;
2037 	gid_t gid;
2038 {
2039 	int error;
2040 	struct mount *mp;
2041 	struct vattr vattr;
2042 
2043 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2044 		return (error);
2045 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2046 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2047 	VATTR_NULL(&vattr);
2048 	vattr.va_uid = uid;
2049 	vattr.va_gid = gid;
2050 #ifdef MAC
2051 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2052 	    vattr.va_gid);
2053 	if (error == 0)
2054 #endif
2055 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2056 	VOP_UNLOCK(vp, 0, td);
2057 	vn_finished_write(mp);
2058 	return error;
2059 }
2060 
2061 /*
2062  * Set ownership given a path name.
2063  */
2064 #ifndef _SYS_SYSPROTO_H_
2065 struct chown_args {
2066 	char	*path;
2067 	int	uid;
2068 	int	gid;
2069 };
2070 #endif
2071 /* ARGSUSED */
2072 int
2073 chown(td, uap)
2074 	struct thread *td;
2075 	register struct chown_args /* {
2076 		syscallarg(char *) path;
2077 		syscallarg(int) uid;
2078 		syscallarg(int) gid;
2079 	} */ *uap;
2080 {
2081 	int error;
2082 	struct nameidata nd;
2083 
2084 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2085 	if ((error = namei(&nd)) != 0)
2086 		return (error);
2087 	NDFREE(&nd, NDF_ONLY_PNBUF);
2088 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2089 	vrele(nd.ni_vp);
2090 	return (error);
2091 }
2092 
2093 /*
2094  * Set ownership given a path name, do not cross symlinks.
2095  */
2096 #ifndef _SYS_SYSPROTO_H_
2097 struct lchown_args {
2098 	char	*path;
2099 	int	uid;
2100 	int	gid;
2101 };
2102 #endif
2103 /* ARGSUSED */
2104 int
2105 lchown(td, uap)
2106 	struct thread *td;
2107 	register struct lchown_args /* {
2108 		syscallarg(char *) path;
2109 		syscallarg(int) uid;
2110 		syscallarg(int) gid;
2111 	} */ *uap;
2112 {
2113 	int error;
2114 	struct nameidata nd;
2115 
2116 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2117 	if ((error = namei(&nd)) != 0)
2118 		return (error);
2119 	NDFREE(&nd, NDF_ONLY_PNBUF);
2120 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2121 	vrele(nd.ni_vp);
2122 	return (error);
2123 }
2124 
2125 /*
2126  * Set ownership given a file descriptor.
2127  */
2128 #ifndef _SYS_SYSPROTO_H_
2129 struct fchown_args {
2130 	int	fd;
2131 	int	uid;
2132 	int	gid;
2133 };
2134 #endif
2135 /* ARGSUSED */
2136 int
2137 fchown(td, uap)
2138 	struct thread *td;
2139 	register struct fchown_args /* {
2140 		syscallarg(int) fd;
2141 		syscallarg(int) uid;
2142 		syscallarg(int) gid;
2143 	} */ *uap;
2144 {
2145 	struct file *fp;
2146 	struct vnode *vp;
2147 	int error;
2148 
2149 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2150 		return (error);
2151 	vp = (struct vnode *)fp->f_data;
2152 	error = setfown(td, (struct vnode *)fp->f_data,
2153 		SCARG(uap, uid), SCARG(uap, gid));
2154 	fdrop(fp, td);
2155 	return (error);
2156 }
2157 
2158 /*
2159  * Common implementation code for utimes(), lutimes(), and futimes().
2160  */
2161 static int
2162 getutimes(usrtvp, tsp)
2163 	const struct timeval *usrtvp;
2164 	struct timespec *tsp;
2165 {
2166 	struct timeval tv[2];
2167 	int error;
2168 
2169 	if (usrtvp == NULL) {
2170 		microtime(&tv[0]);
2171 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2172 		tsp[1] = tsp[0];
2173 	} else {
2174 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2175 			return (error);
2176 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2177 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2178 	}
2179 	return 0;
2180 }
2181 
2182 /*
2183  * Common implementation code for utimes(), lutimes(), and futimes().
2184  */
2185 static int
2186 setutimes(td, vp, ts, numtimes, nullflag)
2187 	struct thread *td;
2188 	struct vnode *vp;
2189 	const struct timespec *ts;
2190 	int numtimes;
2191 	int nullflag;
2192 {
2193 	int error, setbirthtime;
2194 	struct mount *mp;
2195 	struct vattr vattr;
2196 
2197 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2198 		return (error);
2199 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2200 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2201 	setbirthtime = 0;
2202 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2203 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2204 		setbirthtime = 1;
2205 	VATTR_NULL(&vattr);
2206 	vattr.va_atime = ts[0];
2207 	vattr.va_mtime = ts[1];
2208 	if (setbirthtime)
2209 		vattr.va_birthtime = ts[1];
2210 	if (numtimes > 2)
2211 		vattr.va_birthtime = ts[2];
2212 	if (nullflag)
2213 		vattr.va_vaflags |= VA_UTIMES_NULL;
2214 #ifdef MAC
2215 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2216 	    vattr.va_mtime);
2217 	if (error == 0)
2218 #endif
2219 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2220 	VOP_UNLOCK(vp, 0, td);
2221 	vn_finished_write(mp);
2222 	return error;
2223 }
2224 
2225 /*
2226  * Set the access and modification times of a file.
2227  */
2228 #ifndef _SYS_SYSPROTO_H_
2229 struct utimes_args {
2230 	char	*path;
2231 	struct	timeval *tptr;
2232 };
2233 #endif
2234 /* ARGSUSED */
2235 int
2236 utimes(td, uap)
2237 	struct thread *td;
2238 	register struct utimes_args /* {
2239 		syscallarg(char *) path;
2240 		syscallarg(struct timeval *) tptr;
2241 	} */ *uap;
2242 {
2243 	struct timespec ts[2];
2244 	struct timeval *usrtvp;
2245 	int error;
2246 	struct nameidata nd;
2247 
2248 	usrtvp = SCARG(uap, tptr);
2249 	if ((error = getutimes(usrtvp, ts)) != 0)
2250 		return (error);
2251 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2252 	if ((error = namei(&nd)) != 0)
2253 		return (error);
2254 	NDFREE(&nd, NDF_ONLY_PNBUF);
2255 	error = setutimes(td, nd.ni_vp, ts, 2, usrtvp == NULL);
2256 	vrele(nd.ni_vp);
2257 	return (error);
2258 }
2259 
2260 /*
2261  * Set the access and modification times of a file.
2262  */
2263 #ifndef _SYS_SYSPROTO_H_
2264 struct lutimes_args {
2265 	char	*path;
2266 	struct	timeval *tptr;
2267 };
2268 #endif
2269 /* ARGSUSED */
2270 int
2271 lutimes(td, uap)
2272 	struct thread *td;
2273 	register struct lutimes_args /* {
2274 		syscallarg(char *) path;
2275 		syscallarg(struct timeval *) tptr;
2276 	} */ *uap;
2277 {
2278 	struct timespec ts[2];
2279 	struct timeval *usrtvp;
2280 	int error;
2281 	struct nameidata nd;
2282 
2283 	usrtvp = SCARG(uap, tptr);
2284 	if ((error = getutimes(usrtvp, ts)) != 0)
2285 		return (error);
2286 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2287 	if ((error = namei(&nd)) != 0)
2288 		return (error);
2289 	NDFREE(&nd, NDF_ONLY_PNBUF);
2290 	error = setutimes(td, nd.ni_vp, ts, 2, usrtvp == NULL);
2291 	vrele(nd.ni_vp);
2292 	return (error);
2293 }
2294 
2295 /*
2296  * Set the access and modification times of a file.
2297  */
2298 #ifndef _SYS_SYSPROTO_H_
2299 struct futimes_args {
2300 	int	fd;
2301 	struct	timeval *tptr;
2302 };
2303 #endif
2304 /* ARGSUSED */
2305 int
2306 futimes(td, uap)
2307 	struct thread *td;
2308 	register struct futimes_args /* {
2309 		syscallarg(int ) fd;
2310 		syscallarg(struct timeval *) tptr;
2311 	} */ *uap;
2312 {
2313 	struct timespec ts[2];
2314 	struct file *fp;
2315 	struct timeval *usrtvp;
2316 	int error;
2317 
2318 	usrtvp = SCARG(uap, tptr);
2319 	if ((error = getutimes(usrtvp, ts)) != 0)
2320 		return (error);
2321 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2322 		return (error);
2323 	error = setutimes(td, (struct vnode *)fp->f_data, ts, 2, usrtvp==NULL);
2324 	fdrop(fp, td);
2325 	return (error);
2326 }
2327 
2328 /*
2329  * Truncate a file given its path name.
2330  */
2331 #ifndef _SYS_SYSPROTO_H_
2332 struct truncate_args {
2333 	char	*path;
2334 	int	pad;
2335 	off_t	length;
2336 };
2337 #endif
2338 /* ARGSUSED */
2339 int
2340 truncate(td, uap)
2341 	struct thread *td;
2342 	register struct truncate_args /* {
2343 		syscallarg(char *) path;
2344 		syscallarg(int) pad;
2345 		syscallarg(off_t) length;
2346 	} */ *uap;
2347 {
2348 	struct mount *mp;
2349 	struct vnode *vp;
2350 	struct vattr vattr;
2351 	int error;
2352 	struct nameidata nd;
2353 
2354 	if (uap->length < 0)
2355 		return(EINVAL);
2356 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2357 	if ((error = namei(&nd)) != 0)
2358 		return (error);
2359 	vp = nd.ni_vp;
2360 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2361 		vrele(vp);
2362 		return (error);
2363 	}
2364 	NDFREE(&nd, NDF_ONLY_PNBUF);
2365 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2366 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2367 	if (vp->v_type == VDIR)
2368 		error = EISDIR;
2369 #ifdef MAC
2370 	else if ((error = mac_check_vnode_op(td->td_ucred, vp,
2371 	    MAC_OP_VNODE_WRITE))) {}
2372 #endif
2373 	else if ((error = vn_writechk(vp)) == 0 &&
2374 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2375 		VATTR_NULL(&vattr);
2376 		vattr.va_size = SCARG(uap, length);
2377 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2378 	}
2379 	vput(vp);
2380 	vn_finished_write(mp);
2381 	return (error);
2382 }
2383 
2384 /*
2385  * Truncate a file given a file descriptor.
2386  */
2387 #ifndef _SYS_SYSPROTO_H_
2388 struct ftruncate_args {
2389 	int	fd;
2390 	int	pad;
2391 	off_t	length;
2392 };
2393 #endif
2394 /* ARGSUSED */
2395 int
2396 ftruncate(td, uap)
2397 	struct thread *td;
2398 	register struct ftruncate_args /* {
2399 		syscallarg(int) fd;
2400 		syscallarg(int) pad;
2401 		syscallarg(off_t) length;
2402 	} */ *uap;
2403 {
2404 	struct mount *mp;
2405 	struct vattr vattr;
2406 	struct vnode *vp;
2407 	struct file *fp;
2408 	int error;
2409 
2410 	if (uap->length < 0)
2411 		return(EINVAL);
2412 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2413 		return (error);
2414 	if ((fp->f_flag & FWRITE) == 0) {
2415 		fdrop(fp, td);
2416 		return (EINVAL);
2417 	}
2418 	vp = (struct vnode *)fp->f_data;
2419 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2420 		fdrop(fp, td);
2421 		return (error);
2422 	}
2423 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2424 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2425 	if (vp->v_type == VDIR)
2426 		error = EISDIR;
2427 #ifdef MAC
2428 	else if ((error = mac_check_vnode_op(td->td_ucred, vp,
2429 	    MAC_OP_VNODE_WRITE))) {}
2430 #endif
2431 	else if ((error = vn_writechk(vp)) == 0) {
2432 		VATTR_NULL(&vattr);
2433 		vattr.va_size = SCARG(uap, length);
2434 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2435 	}
2436 	VOP_UNLOCK(vp, 0, td);
2437 	vn_finished_write(mp);
2438 	fdrop(fp, td);
2439 	return (error);
2440 }
2441 
2442 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2443 /*
2444  * Truncate a file given its path name.
2445  */
2446 #ifndef _SYS_SYSPROTO_H_
2447 struct otruncate_args {
2448 	char	*path;
2449 	long	length;
2450 };
2451 #endif
2452 /* ARGSUSED */
2453 int
2454 otruncate(td, uap)
2455 	struct thread *td;
2456 	register struct otruncate_args /* {
2457 		syscallarg(char *) path;
2458 		syscallarg(long) length;
2459 	} */ *uap;
2460 {
2461 	struct truncate_args /* {
2462 		syscallarg(char *) path;
2463 		syscallarg(int) pad;
2464 		syscallarg(off_t) length;
2465 	} */ nuap;
2466 
2467 	SCARG(&nuap, path) = SCARG(uap, path);
2468 	SCARG(&nuap, length) = SCARG(uap, length);
2469 	return (truncate(td, &nuap));
2470 }
2471 
2472 /*
2473  * Truncate a file given a file descriptor.
2474  */
2475 #ifndef _SYS_SYSPROTO_H_
2476 struct oftruncate_args {
2477 	int	fd;
2478 	long	length;
2479 };
2480 #endif
2481 /* ARGSUSED */
2482 int
2483 oftruncate(td, uap)
2484 	struct thread *td;
2485 	register struct oftruncate_args /* {
2486 		syscallarg(int) fd;
2487 		syscallarg(long) length;
2488 	} */ *uap;
2489 {
2490 	struct ftruncate_args /* {
2491 		syscallarg(int) fd;
2492 		syscallarg(int) pad;
2493 		syscallarg(off_t) length;
2494 	} */ nuap;
2495 
2496 	SCARG(&nuap, fd) = SCARG(uap, fd);
2497 	SCARG(&nuap, length) = SCARG(uap, length);
2498 	return (ftruncate(td, &nuap));
2499 }
2500 #endif /* COMPAT_43 || COMPAT_SUNOS */
2501 
2502 /*
2503  * Sync an open file.
2504  */
2505 #ifndef _SYS_SYSPROTO_H_
2506 struct fsync_args {
2507 	int	fd;
2508 };
2509 #endif
2510 /* ARGSUSED */
2511 int
2512 fsync(td, uap)
2513 	struct thread *td;
2514 	struct fsync_args /* {
2515 		syscallarg(int) fd;
2516 	} */ *uap;
2517 {
2518 	struct vnode *vp;
2519 	struct mount *mp;
2520 	struct file *fp;
2521 	vm_object_t obj;
2522 	int error;
2523 
2524 	GIANT_REQUIRED;
2525 
2526 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2527 		return (error);
2528 	vp = (struct vnode *)fp->f_data;
2529 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2530 		fdrop(fp, td);
2531 		return (error);
2532 	}
2533 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2534 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2535 		vm_object_page_clean(obj, 0, 0, 0);
2536 	}
2537 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2538 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2539 	    && softdep_fsync_hook != NULL)
2540 		error = (*softdep_fsync_hook)(vp);
2541 
2542 	VOP_UNLOCK(vp, 0, td);
2543 	vn_finished_write(mp);
2544 	fdrop(fp, td);
2545 	return (error);
2546 }
2547 
2548 /*
2549  * Rename files.  Source and destination must either both be directories,
2550  * or both not be directories.  If target is a directory, it must be empty.
2551  */
2552 #ifndef _SYS_SYSPROTO_H_
2553 struct rename_args {
2554 	char	*from;
2555 	char	*to;
2556 };
2557 #endif
2558 /* ARGSUSED */
2559 int
2560 rename(td, uap)
2561 	struct thread *td;
2562 	register struct rename_args /* {
2563 		syscallarg(char *) from;
2564 		syscallarg(char *) to;
2565 	} */ *uap;
2566 {
2567 	struct mount *mp;
2568 	struct vnode *tvp, *fvp, *tdvp;
2569 	struct nameidata fromnd, tond;
2570 	int error;
2571 
2572 	bwillwrite();
2573 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2574 	    SCARG(uap, from), td);
2575 	if ((error = namei(&fromnd)) != 0)
2576 		return (error);
2577 	fvp = fromnd.ni_vp;
2578 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2579 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2580 		vrele(fromnd.ni_dvp);
2581 		vrele(fvp);
2582 		goto out1;
2583 	}
2584 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2585 	    UIO_USERSPACE, SCARG(uap, to), td);
2586 	if (fromnd.ni_vp->v_type == VDIR)
2587 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2588 	if ((error = namei(&tond)) != 0) {
2589 		/* Translate error code for rename("dir1", "dir2/."). */
2590 		if (error == EISDIR && fvp->v_type == VDIR)
2591 			error = EINVAL;
2592 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2593 		vrele(fromnd.ni_dvp);
2594 		vrele(fvp);
2595 		goto out1;
2596 	}
2597 	tdvp = tond.ni_dvp;
2598 	tvp = tond.ni_vp;
2599 	if (tvp != NULL) {
2600 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2601 			error = ENOTDIR;
2602 			goto out;
2603 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2604 			error = EISDIR;
2605 			goto out;
2606 		}
2607 	}
2608 	if (fvp == tdvp)
2609 		error = EINVAL;
2610 	/*
2611 	 * If source is the same as the destination (that is the
2612 	 * same inode number with the same name in the same directory),
2613 	 * then there is nothing to do.
2614 	 */
2615 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2616 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2617 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2618 	      fromnd.ni_cnd.cn_namelen))
2619 		error = -1;
2620 out:
2621 	if (!error) {
2622 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2623 		if (fromnd.ni_dvp != tdvp) {
2624 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2625 		}
2626 		if (tvp) {
2627 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2628 		}
2629 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2630 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2631 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2632 		NDFREE(&tond, NDF_ONLY_PNBUF);
2633 	} else {
2634 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2635 		NDFREE(&tond, NDF_ONLY_PNBUF);
2636 		if (tdvp == tvp)
2637 			vrele(tdvp);
2638 		else
2639 			vput(tdvp);
2640 		if (tvp)
2641 			vput(tvp);
2642 		vrele(fromnd.ni_dvp);
2643 		vrele(fvp);
2644 	}
2645 	vrele(tond.ni_startdir);
2646 	vn_finished_write(mp);
2647 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2648 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2649 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2650 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2651 out1:
2652 	if (fromnd.ni_startdir)
2653 		vrele(fromnd.ni_startdir);
2654 	if (error == -1)
2655 		return (0);
2656 	return (error);
2657 }
2658 
2659 /*
2660  * Make a directory file.
2661  */
2662 #ifndef _SYS_SYSPROTO_H_
2663 struct mkdir_args {
2664 	char	*path;
2665 	int	mode;
2666 };
2667 #endif
2668 /* ARGSUSED */
2669 int
2670 mkdir(td, uap)
2671 	struct thread *td;
2672 	register struct mkdir_args /* {
2673 		syscallarg(char *) path;
2674 		syscallarg(int) mode;
2675 	} */ *uap;
2676 {
2677 
2678 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
2679 }
2680 
2681 int
2682 vn_mkdir(path, mode, segflg, td)
2683 	char *path;
2684 	int mode;
2685 	enum uio_seg segflg;
2686 	struct thread *td;
2687 {
2688 	struct mount *mp;
2689 	struct vnode *vp;
2690 	struct vattr vattr;
2691 	int error;
2692 	struct nameidata nd;
2693 
2694 restart:
2695 	bwillwrite();
2696 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2697 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2698 	if ((error = namei(&nd)) != 0)
2699 		return (error);
2700 	vp = nd.ni_vp;
2701 	if (vp != NULL) {
2702 		NDFREE(&nd, NDF_ONLY_PNBUF);
2703 		vrele(vp);
2704 		/*
2705 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2706 		 * the strange behaviour of leaving the vnode unlocked
2707 		 * if the target is the same vnode as the parent.
2708 		 */
2709 		if (vp == nd.ni_dvp)
2710 			vrele(nd.ni_dvp);
2711 		else
2712 			vput(nd.ni_dvp);
2713 		return (EEXIST);
2714 	}
2715 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2716 		NDFREE(&nd, NDF_ONLY_PNBUF);
2717 		vput(nd.ni_dvp);
2718 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2719 			return (error);
2720 		goto restart;
2721 	}
2722 	VATTR_NULL(&vattr);
2723 	vattr.va_type = VDIR;
2724 	FILEDESC_LOCK(td->td_proc->p_fd);
2725 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2726 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2727 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2728 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2729 	NDFREE(&nd, NDF_ONLY_PNBUF);
2730 	vput(nd.ni_dvp);
2731 	if (!error)
2732 		vput(nd.ni_vp);
2733 	vn_finished_write(mp);
2734 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2735 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2736 	return (error);
2737 }
2738 
2739 /*
2740  * Remove a directory file.
2741  */
2742 #ifndef _SYS_SYSPROTO_H_
2743 struct rmdir_args {
2744 	char	*path;
2745 };
2746 #endif
2747 /* ARGSUSED */
2748 int
2749 rmdir(td, uap)
2750 	struct thread *td;
2751 	struct rmdir_args /* {
2752 		syscallarg(char *) path;
2753 	} */ *uap;
2754 {
2755 	struct mount *mp;
2756 	struct vnode *vp;
2757 	int error;
2758 	struct nameidata nd;
2759 
2760 restart:
2761 	bwillwrite();
2762 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2763 	    SCARG(uap, path), td);
2764 	if ((error = namei(&nd)) != 0)
2765 		return (error);
2766 	vp = nd.ni_vp;
2767 	if (vp->v_type != VDIR) {
2768 		error = ENOTDIR;
2769 		goto out;
2770 	}
2771 	/*
2772 	 * No rmdir "." please.
2773 	 */
2774 	if (nd.ni_dvp == vp) {
2775 		error = EINVAL;
2776 		goto out;
2777 	}
2778 	/*
2779 	 * The root of a mounted filesystem cannot be deleted.
2780 	 */
2781 	if (vp->v_flag & VROOT) {
2782 		error = EBUSY;
2783 		goto out;
2784 	}
2785 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2786 		NDFREE(&nd, NDF_ONLY_PNBUF);
2787 		if (nd.ni_dvp == vp)
2788 			vrele(nd.ni_dvp);
2789 		else
2790 			vput(nd.ni_dvp);
2791 		vput(vp);
2792 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2793 			return (error);
2794 		goto restart;
2795 	}
2796 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2797 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2798 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2799 	vn_finished_write(mp);
2800 out:
2801 	NDFREE(&nd, NDF_ONLY_PNBUF);
2802 	if (nd.ni_dvp == vp)
2803 		vrele(nd.ni_dvp);
2804 	else
2805 		vput(nd.ni_dvp);
2806 	vput(vp);
2807 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2808 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2809 	return (error);
2810 }
2811 
2812 #ifdef COMPAT_43
2813 /*
2814  * Read a block of directory entries in a filesystem independent format.
2815  */
2816 #ifndef _SYS_SYSPROTO_H_
2817 struct ogetdirentries_args {
2818 	int	fd;
2819 	char	*buf;
2820 	u_int	count;
2821 	long	*basep;
2822 };
2823 #endif
2824 int
2825 ogetdirentries(td, uap)
2826 	struct thread *td;
2827 	register struct ogetdirentries_args /* {
2828 		syscallarg(int) fd;
2829 		syscallarg(char *) buf;
2830 		syscallarg(u_int) count;
2831 		syscallarg(long *) basep;
2832 	} */ *uap;
2833 {
2834 	struct vnode *vp;
2835 	struct file *fp;
2836 	struct uio auio, kuio;
2837 	struct iovec aiov, kiov;
2838 	struct dirent *dp, *edp;
2839 	caddr_t dirbuf;
2840 	int error, eofflag, readcnt;
2841 	long loff;
2842 
2843 	/* XXX arbitrary sanity limit on `count'. */
2844 	if (SCARG(uap, count) > 64 * 1024)
2845 		return (EINVAL);
2846 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2847 		return (error);
2848 	if ((fp->f_flag & FREAD) == 0) {
2849 		fdrop(fp, td);
2850 		return (EBADF);
2851 	}
2852 	vp = (struct vnode *)fp->f_data;
2853 unionread:
2854 	if (vp->v_type != VDIR) {
2855 		fdrop(fp, td);
2856 		return (EINVAL);
2857 	}
2858 	aiov.iov_base = SCARG(uap, buf);
2859 	aiov.iov_len = SCARG(uap, count);
2860 	auio.uio_iov = &aiov;
2861 	auio.uio_iovcnt = 1;
2862 	auio.uio_rw = UIO_READ;
2863 	auio.uio_segflg = UIO_USERSPACE;
2864 	auio.uio_td = td;
2865 	auio.uio_resid = SCARG(uap, count);
2866 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2867 	loff = auio.uio_offset = fp->f_offset;
2868 #	if (BYTE_ORDER != LITTLE_ENDIAN)
2869 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2870 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
2871 			    NULL, NULL);
2872 			fp->f_offset = auio.uio_offset;
2873 		} else
2874 #	endif
2875 	{
2876 		kuio = auio;
2877 		kuio.uio_iov = &kiov;
2878 		kuio.uio_segflg = UIO_SYSSPACE;
2879 		kiov.iov_len = SCARG(uap, count);
2880 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
2881 		kiov.iov_base = dirbuf;
2882 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
2883 			    NULL, NULL);
2884 		fp->f_offset = kuio.uio_offset;
2885 		if (error == 0) {
2886 			readcnt = SCARG(uap, count) - kuio.uio_resid;
2887 			edp = (struct dirent *)&dirbuf[readcnt];
2888 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2889 #				if (BYTE_ORDER == LITTLE_ENDIAN)
2890 					/*
2891 					 * The expected low byte of
2892 					 * dp->d_namlen is our dp->d_type.
2893 					 * The high MBZ byte of dp->d_namlen
2894 					 * is our dp->d_namlen.
2895 					 */
2896 					dp->d_type = dp->d_namlen;
2897 					dp->d_namlen = 0;
2898 #				else
2899 					/*
2900 					 * The dp->d_type is the high byte
2901 					 * of the expected dp->d_namlen,
2902 					 * so must be zero'ed.
2903 					 */
2904 					dp->d_type = 0;
2905 #				endif
2906 				if (dp->d_reclen > 0) {
2907 					dp = (struct dirent *)
2908 					    ((char *)dp + dp->d_reclen);
2909 				} else {
2910 					error = EIO;
2911 					break;
2912 				}
2913 			}
2914 			if (dp >= edp)
2915 				error = uiomove(dirbuf, readcnt, &auio);
2916 		}
2917 		FREE(dirbuf, M_TEMP);
2918 	}
2919 	VOP_UNLOCK(vp, 0, td);
2920 	if (error) {
2921 		fdrop(fp, td);
2922 		return (error);
2923 	}
2924 	if (SCARG(uap, count) == auio.uio_resid) {
2925 		if (union_dircheckp) {
2926 			error = union_dircheckp(td, &vp, fp);
2927 			if (error == -1)
2928 				goto unionread;
2929 			if (error) {
2930 				fdrop(fp, td);
2931 				return (error);
2932 			}
2933 		}
2934 		if ((vp->v_flag & VROOT) &&
2935 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
2936 			struct vnode *tvp = vp;
2937 			vp = vp->v_mount->mnt_vnodecovered;
2938 			VREF(vp);
2939 			fp->f_data = vp;
2940 			fp->f_offset = 0;
2941 			vrele(tvp);
2942 			goto unionread;
2943 		}
2944 	}
2945 	error = copyout(&loff, SCARG(uap, basep), sizeof(long));
2946 	fdrop(fp, td);
2947 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2948 	return (error);
2949 }
2950 #endif /* COMPAT_43 */
2951 
2952 /*
2953  * Read a block of directory entries in a filesystem independent format.
2954  */
2955 #ifndef _SYS_SYSPROTO_H_
2956 struct getdirentries_args {
2957 	int	fd;
2958 	char	*buf;
2959 	u_int	count;
2960 	long	*basep;
2961 };
2962 #endif
2963 int
2964 getdirentries(td, uap)
2965 	struct thread *td;
2966 	register struct getdirentries_args /* {
2967 		syscallarg(int) fd;
2968 		syscallarg(char *) buf;
2969 		syscallarg(u_int) count;
2970 		syscallarg(long *) basep;
2971 	} */ *uap;
2972 {
2973 	struct vnode *vp;
2974 	struct file *fp;
2975 	struct uio auio;
2976 	struct iovec aiov;
2977 	long loff;
2978 	int error, eofflag;
2979 
2980 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2981 		return (error);
2982 	if ((fp->f_flag & FREAD) == 0) {
2983 		fdrop(fp, td);
2984 		return (EBADF);
2985 	}
2986 	vp = (struct vnode *)fp->f_data;
2987 unionread:
2988 	if (vp->v_type != VDIR) {
2989 		fdrop(fp, td);
2990 		return (EINVAL);
2991 	}
2992 	aiov.iov_base = SCARG(uap, buf);
2993 	aiov.iov_len = SCARG(uap, count);
2994 	auio.uio_iov = &aiov;
2995 	auio.uio_iovcnt = 1;
2996 	auio.uio_rw = UIO_READ;
2997 	auio.uio_segflg = UIO_USERSPACE;
2998 	auio.uio_td = td;
2999 	auio.uio_resid = SCARG(uap, count);
3000 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3001 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3002 	loff = auio.uio_offset = fp->f_offset;
3003 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3004 	fp->f_offset = auio.uio_offset;
3005 	VOP_UNLOCK(vp, 0, td);
3006 	if (error) {
3007 		fdrop(fp, td);
3008 		return (error);
3009 	}
3010 	if (SCARG(uap, count) == auio.uio_resid) {
3011 		if (union_dircheckp) {
3012 			error = union_dircheckp(td, &vp, fp);
3013 			if (error == -1)
3014 				goto unionread;
3015 			if (error) {
3016 				fdrop(fp, td);
3017 				return (error);
3018 			}
3019 		}
3020 		if ((vp->v_flag & VROOT) &&
3021 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3022 			struct vnode *tvp = vp;
3023 			vp = vp->v_mount->mnt_vnodecovered;
3024 			VREF(vp);
3025 			fp->f_data = vp;
3026 			fp->f_offset = 0;
3027 			vrele(tvp);
3028 			goto unionread;
3029 		}
3030 	}
3031 	if (SCARG(uap, basep) != NULL) {
3032 		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3033 	}
3034 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3035 	fdrop(fp, td);
3036 	return (error);
3037 }
3038 #ifndef _SYS_SYSPROTO_H_
3039 struct getdents_args {
3040 	int fd;
3041 	char *buf;
3042 	size_t count;
3043 };
3044 #endif
3045 int
3046 getdents(td, uap)
3047 	struct thread *td;
3048 	register struct getdents_args /* {
3049 		syscallarg(int) fd;
3050 		syscallarg(char *) buf;
3051 		syscallarg(u_int) count;
3052 	} */ *uap;
3053 {
3054 	struct getdirentries_args ap;
3055 	ap.fd = uap->fd;
3056 	ap.buf = uap->buf;
3057 	ap.count = uap->count;
3058 	ap.basep = NULL;
3059 	return getdirentries(td, &ap);
3060 }
3061 
3062 /*
3063  * Set the mode mask for creation of filesystem nodes.
3064  *
3065  * MP SAFE
3066  */
3067 #ifndef _SYS_SYSPROTO_H_
3068 struct umask_args {
3069 	int	newmask;
3070 };
3071 #endif
3072 int
3073 umask(td, uap)
3074 	struct thread *td;
3075 	struct umask_args /* {
3076 		syscallarg(int) newmask;
3077 	} */ *uap;
3078 {
3079 	register struct filedesc *fdp;
3080 
3081 	FILEDESC_LOCK(td->td_proc->p_fd);
3082 	fdp = td->td_proc->p_fd;
3083 	td->td_retval[0] = fdp->fd_cmask;
3084 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3085 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3086 	return (0);
3087 }
3088 
3089 /*
3090  * Void all references to file by ripping underlying filesystem
3091  * away from vnode.
3092  */
3093 #ifndef _SYS_SYSPROTO_H_
3094 struct revoke_args {
3095 	char	*path;
3096 };
3097 #endif
3098 /* ARGSUSED */
3099 int
3100 revoke(td, uap)
3101 	struct thread *td;
3102 	register struct revoke_args /* {
3103 		syscallarg(char *) path;
3104 	} */ *uap;
3105 {
3106 	struct mount *mp;
3107 	struct vnode *vp;
3108 	struct vattr vattr;
3109 	int error;
3110 	struct nameidata nd;
3111 
3112 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3113 	    td);
3114 	if ((error = namei(&nd)) != 0)
3115 		return (error);
3116 	vp = nd.ni_vp;
3117 	NDFREE(&nd, NDF_ONLY_PNBUF);
3118 	if (vp->v_type != VCHR) {
3119 		vput(vp);
3120 		return (EINVAL);
3121 	}
3122 #ifdef MAC
3123 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3124 	if (error) {
3125 		vput(vp);
3126 		return (error);
3127 	}
3128 #endif
3129 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3130 	if (error) {
3131 		vput(vp);
3132 		return (error);
3133 	}
3134 	VOP_UNLOCK(vp, 0, td);
3135 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3136 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3137 		if (error)
3138 			goto out;
3139 	}
3140 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3141 		goto out;
3142 	if (vcount(vp) > 1)
3143 		VOP_REVOKE(vp, REVOKEALL);
3144 	vn_finished_write(mp);
3145 out:
3146 	vrele(vp);
3147 	return (error);
3148 }
3149 
3150 /*
3151  * Convert a user file descriptor to a kernel file entry.
3152  * The file entry is locked upon returning.
3153  */
3154 int
3155 getvnode(fdp, fd, fpp)
3156 	struct filedesc *fdp;
3157 	int fd;
3158 	struct file **fpp;
3159 {
3160 	int error;
3161 	struct file *fp;
3162 
3163 	fp = NULL;
3164 	if (fdp == NULL)
3165 		error = EBADF;
3166 	else {
3167 		FILEDESC_LOCK(fdp);
3168 		if ((u_int)fd >= fdp->fd_nfiles ||
3169 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3170 			error = EBADF;
3171 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3172 			fp = NULL;
3173 			error = EINVAL;
3174 		} else {
3175 			fhold(fp);
3176 			error = 0;
3177 		}
3178 		FILEDESC_UNLOCK(fdp);
3179 	}
3180 	*fpp = fp;
3181 	return (error);
3182 }
3183 /*
3184  * Get (NFS) file handle
3185  */
3186 #ifndef _SYS_SYSPROTO_H_
3187 struct getfh_args {
3188 	char	*fname;
3189 	fhandle_t *fhp;
3190 };
3191 #endif
3192 int
3193 getfh(td, uap)
3194 	struct thread *td;
3195 	register struct getfh_args *uap;
3196 {
3197 	struct nameidata nd;
3198 	fhandle_t fh;
3199 	register struct vnode *vp;
3200 	int error;
3201 
3202 	/*
3203 	 * Must be super user
3204 	 */
3205 	error = suser(td);
3206 	if (error)
3207 		return (error);
3208 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3209 	error = namei(&nd);
3210 	if (error)
3211 		return (error);
3212 	NDFREE(&nd, NDF_ONLY_PNBUF);
3213 	vp = nd.ni_vp;
3214 	bzero(&fh, sizeof(fh));
3215 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3216 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3217 	vput(vp);
3218 	if (error)
3219 		return (error);
3220 	error = copyout(&fh, uap->fhp, sizeof (fh));
3221 	return (error);
3222 }
3223 
3224 /*
3225  * syscall for the rpc.lockd to use to translate a NFS file handle into
3226  * an open descriptor.
3227  *
3228  * warning: do not remove the suser() call or this becomes one giant
3229  * security hole.
3230  */
3231 #ifndef _SYS_SYSPROTO_H_
3232 struct fhopen_args {
3233 	const struct fhandle *u_fhp;
3234 	int flags;
3235 };
3236 #endif
3237 int
3238 fhopen(td, uap)
3239 	struct thread *td;
3240 	struct fhopen_args /* {
3241 		syscallarg(const struct fhandle *) u_fhp;
3242 		syscallarg(int) flags;
3243 	} */ *uap;
3244 {
3245 	struct proc *p = td->td_proc;
3246 	struct mount *mp;
3247 	struct vnode *vp;
3248 	struct fhandle fhp;
3249 	struct vattr vat;
3250 	struct vattr *vap = &vat;
3251 	struct flock lf;
3252 	struct file *fp;
3253 	register struct filedesc *fdp = p->p_fd;
3254 	int fmode, mode, error, type;
3255 	struct file *nfp;
3256 	int indx;
3257 
3258 	/*
3259 	 * Must be super user
3260 	 */
3261 	error = suser(td);
3262 	if (error)
3263 		return (error);
3264 
3265 	fmode = FFLAGS(SCARG(uap, flags));
3266 	/* why not allow a non-read/write open for our lockd? */
3267 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3268 		return (EINVAL);
3269 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3270 	if (error)
3271 		return(error);
3272 	/* find the mount point */
3273 	mp = vfs_getvfs(&fhp.fh_fsid);
3274 	if (mp == NULL)
3275 		return (ESTALE);
3276 	/* now give me my vnode, it gets returned to me locked */
3277 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3278 	if (error)
3279 		return (error);
3280  	/*
3281 	 * from now on we have to make sure not
3282 	 * to forget about the vnode
3283 	 * any error that causes an abort must vput(vp)
3284 	 * just set error = err and 'goto bad;'.
3285 	 */
3286 
3287 	/*
3288 	 * from vn_open
3289 	 */
3290 	if (vp->v_type == VLNK) {
3291 		error = EMLINK;
3292 		goto bad;
3293 	}
3294 	if (vp->v_type == VSOCK) {
3295 		error = EOPNOTSUPP;
3296 		goto bad;
3297 	}
3298 	mode = 0;
3299 	if (fmode & (FWRITE | O_TRUNC)) {
3300 		if (vp->v_type == VDIR) {
3301 			error = EISDIR;
3302 			goto bad;
3303 		}
3304 		error = vn_writechk(vp);
3305 		if (error)
3306 			goto bad;
3307 		mode |= VWRITE;
3308 	}
3309 	if (fmode & FREAD)
3310 		mode |= VREAD;
3311 	if (fmode & O_APPEND)
3312 		mode |= VAPPEND;
3313 #ifdef MAC
3314 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3315 	if (error)
3316 		goto bad;
3317 #endif
3318 	if (mode) {
3319 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3320 		if (error)
3321 			goto bad;
3322 	}
3323 	if (fmode & O_TRUNC) {
3324 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3325 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3326 			vrele(vp);
3327 			return (error);
3328 		}
3329 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3330 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3331 #ifdef MAC
3332 		error = mac_check_vnode_op(td->td_ucred, vp,
3333 		    MAC_OP_VNODE_WRITE);
3334 		if (error == 0) {
3335 #endif
3336 			VATTR_NULL(vap);
3337 			vap->va_size = 0;
3338 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3339 #ifdef MAC
3340 		}
3341 #endif
3342 		vn_finished_write(mp);
3343 		if (error)
3344 			goto bad;
3345 	}
3346 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3347 	if (error)
3348 		goto bad;
3349 	/*
3350 	 * Make sure that a VM object is created for VMIO support.
3351 	 */
3352 	if (vn_canvmio(vp) == TRUE) {
3353 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3354 			goto bad;
3355 	}
3356 	if (fmode & FWRITE)
3357 		vp->v_writecount++;
3358 
3359 	/*
3360 	 * end of vn_open code
3361 	 */
3362 
3363 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3364 		if (fmode & FWRITE)
3365 			vp->v_writecount--;
3366 		goto bad;
3367 	}
3368 	fp = nfp;
3369 
3370 	/*
3371 	 * Hold an extra reference to avoid having fp ripped out
3372 	 * from under us while we block in the lock op
3373 	 */
3374 	fhold(fp);
3375 	nfp->f_data = vp;
3376 	nfp->f_flag = fmode & FMASK;
3377 	nfp->f_ops = &vnops;
3378 	nfp->f_type = DTYPE_VNODE;
3379 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3380 		lf.l_whence = SEEK_SET;
3381 		lf.l_start = 0;
3382 		lf.l_len = 0;
3383 		if (fmode & O_EXLOCK)
3384 			lf.l_type = F_WRLCK;
3385 		else
3386 			lf.l_type = F_RDLCK;
3387 		type = F_FLOCK;
3388 		if ((fmode & FNONBLOCK) == 0)
3389 			type |= F_WAIT;
3390 		VOP_UNLOCK(vp, 0, td);
3391 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3392 			    type)) != 0) {
3393 			/*
3394 			 * The lock request failed.  Normally close the
3395 			 * descriptor but handle the case where someone might
3396 			 * have dup()d or close()d it when we weren't looking.
3397 			 */
3398 			FILEDESC_LOCK(fdp);
3399 			if (fdp->fd_ofiles[indx] == fp) {
3400 				fdp->fd_ofiles[indx] = NULL;
3401 				FILEDESC_UNLOCK(fdp);
3402 				fdrop(fp, td);
3403 			} else
3404 				FILEDESC_UNLOCK(fdp);
3405 			/*
3406 			 * release our private reference
3407 			 */
3408 			fdrop(fp, td);
3409 			return(error);
3410 		}
3411 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3412 		fp->f_flag |= FHASLOCK;
3413 	}
3414 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3415 		vfs_object_create(vp, td, td->td_ucred);
3416 
3417 	VOP_UNLOCK(vp, 0, td);
3418 	fdrop(fp, td);
3419 	td->td_retval[0] = indx;
3420 	return (0);
3421 
3422 bad:
3423 	vput(vp);
3424 	return (error);
3425 }
3426 
3427 /*
3428  * Stat an (NFS) file handle.
3429  */
3430 #ifndef _SYS_SYSPROTO_H_
3431 struct fhstat_args {
3432 	struct fhandle *u_fhp;
3433 	struct stat *sb;
3434 };
3435 #endif
3436 int
3437 fhstat(td, uap)
3438 	struct thread *td;
3439 	register struct fhstat_args /* {
3440 		syscallarg(struct fhandle *) u_fhp;
3441 		syscallarg(struct stat *) sb;
3442 	} */ *uap;
3443 {
3444 	struct stat sb;
3445 	fhandle_t fh;
3446 	struct mount *mp;
3447 	struct vnode *vp;
3448 	int error;
3449 
3450 	/*
3451 	 * Must be super user
3452 	 */
3453 	error = suser(td);
3454 	if (error)
3455 		return (error);
3456 
3457 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3458 	if (error)
3459 		return (error);
3460 
3461 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3462 		return (ESTALE);
3463 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3464 		return (error);
3465 	error = vn_stat(vp, &sb, td);
3466 	vput(vp);
3467 	if (error)
3468 		return (error);
3469 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3470 	return (error);
3471 }
3472 
3473 /*
3474  * Implement fstatfs() for (NFS) file handles.
3475  */
3476 #ifndef _SYS_SYSPROTO_H_
3477 struct fhstatfs_args {
3478 	struct fhandle *u_fhp;
3479 	struct statfs *buf;
3480 };
3481 #endif
3482 int
3483 fhstatfs(td, uap)
3484 	struct thread *td;
3485 	struct fhstatfs_args /* {
3486 		syscallarg(struct fhandle) *u_fhp;
3487 		syscallarg(struct statfs) *buf;
3488 	} */ *uap;
3489 {
3490 	struct statfs *sp;
3491 	struct mount *mp;
3492 	struct vnode *vp;
3493 	struct statfs sb;
3494 	fhandle_t fh;
3495 	int error;
3496 
3497 	/*
3498 	 * Must be super user
3499 	 */
3500 	error = suser(td);
3501 	if (error)
3502 		return (error);
3503 
3504 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3505 		return (error);
3506 
3507 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3508 		return (ESTALE);
3509 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3510 		return (error);
3511 	mp = vp->v_mount;
3512 	sp = &mp->mnt_stat;
3513 	vput(vp);
3514 #ifdef MAC
3515 	error = mac_check_mount_stat(td->td_ucred, mp);
3516 	if (error)
3517 		return (error);
3518 #endif
3519 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3520 		return (error);
3521 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3522 	if (suser(td)) {
3523 		bcopy(sp, &sb, sizeof(sb));
3524 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3525 		sp = &sb;
3526 	}
3527 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3528 }
3529 
3530 /*
3531  * Syscall to push extended attribute configuration information into the
3532  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3533  * a command (int cmd), and attribute name and misc data.  For now, the
3534  * attribute name is left in userspace for consumption by the VFS_op.
3535  * It will probably be changed to be copied into sysspace by the
3536  * syscall in the future, once issues with various consumers of the
3537  * attribute code have raised their hands.
3538  *
3539  * Currently this is used only by UFS Extended Attributes.
3540  */
3541 int
3542 extattrctl(td, uap)
3543 	struct thread *td;
3544 	struct extattrctl_args /* {
3545 		syscallarg(const char *) path;
3546 		syscallarg(int) cmd;
3547 		syscallarg(const char *) filename;
3548 		syscallarg(int) attrnamespace;
3549 		syscallarg(const char *) attrname;
3550 	} */ *uap;
3551 {
3552 	struct vnode *filename_vp;
3553 	struct nameidata nd;
3554 	struct mount *mp, *mp_writable;
3555 	char attrname[EXTATTR_MAXNAMELEN];
3556 	int error;
3557 
3558 	/*
3559 	 * uap->attrname is not always defined.  We check again later when we
3560 	 * invoke the VFS call so as to pass in NULL there if needed.
3561 	 */
3562 	if (uap->attrname != NULL) {
3563 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3564 		    NULL);
3565 		if (error)
3566 			return (error);
3567 	}
3568 
3569 	/*
3570 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3571 	 * which VFS_EXTATTRCTL() will later release.
3572 	 */
3573 	filename_vp = NULL;
3574 	if (uap->filename != NULL) {
3575 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3576 		    uap->filename, td);
3577 		if ((error = namei(&nd)) != 0)
3578 			return (error);
3579 		filename_vp = nd.ni_vp;
3580 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3581 	}
3582 
3583 	/* uap->path is always defined. */
3584 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3585 	if ((error = namei(&nd)) != 0) {
3586 		if (filename_vp != NULL)
3587 			vput(filename_vp);
3588 		return (error);
3589 	}
3590 	mp = nd.ni_vp->v_mount;
3591 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3592 	NDFREE(&nd, 0);
3593 	if (error) {
3594 		if (filename_vp != NULL)
3595 			vput(filename_vp);
3596 		return (error);
3597 	}
3598 
3599 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3600 	    uap->attrname != NULL ? attrname : NULL, td);
3601 
3602 	vn_finished_write(mp_writable);
3603 	/*
3604 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3605 	 * filename_vp, so vrele it if it is defined.
3606 	 */
3607 	if (filename_vp != NULL)
3608 		vrele(filename_vp);
3609 
3610 	return (error);
3611 }
3612 
3613 /*-
3614  * Set a named extended attribute on a file or directory
3615  *
3616  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3617  *            kernelspace string pointer "attrname", userspace buffer
3618  *            pointer "data", buffer length "nbytes", thread "td".
3619  * Returns: 0 on success, an error number otherwise
3620  * Locks: none
3621  * References: vp must be a valid reference for the duration of the call
3622  */
3623 static int
3624 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3625     void *data, size_t nbytes, struct thread *td)
3626 {
3627 	struct mount *mp;
3628 	struct uio auio;
3629 	struct iovec aiov;
3630 	ssize_t cnt;
3631 	int error;
3632 
3633 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3634 		return (error);
3635 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3636 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3637 
3638 	aiov.iov_base = data;
3639 	aiov.iov_len = nbytes;
3640 	auio.uio_iov = &aiov;
3641 	auio.uio_iovcnt = 1;
3642 	auio.uio_offset = 0;
3643 	if (nbytes > INT_MAX) {
3644 		error = EINVAL;
3645 		goto done;
3646 	}
3647 	auio.uio_resid = nbytes;
3648 	auio.uio_rw = UIO_WRITE;
3649 	auio.uio_segflg = UIO_USERSPACE;
3650 	auio.uio_td = td;
3651 	cnt = nbytes;
3652 
3653 #ifdef MAC
3654 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3655 	    attrname, &auio);
3656 	if (error)
3657 		goto done;
3658 #endif
3659 
3660 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3661 	    td->td_ucred, td);
3662 	cnt -= auio.uio_resid;
3663 	td->td_retval[0] = cnt;
3664 
3665 done:
3666 	VOP_UNLOCK(vp, 0, td);
3667 	vn_finished_write(mp);
3668 	return (error);
3669 }
3670 
3671 int
3672 extattr_set_file(td, uap)
3673 	struct thread *td;
3674 	struct extattr_set_file_args /* {
3675 		syscallarg(const char *) path;
3676 		syscallarg(int) attrnamespace;
3677 		syscallarg(const char *) attrname;
3678 		syscallarg(void *) data;
3679 		syscallarg(size_t) nbytes;
3680 	} */ *uap;
3681 {
3682 	struct nameidata nd;
3683 	char attrname[EXTATTR_MAXNAMELEN];
3684 	int error;
3685 
3686 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3687 	if (error)
3688 		return (error);
3689 
3690 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3691 	if ((error = namei(&nd)) != 0)
3692 		return (error);
3693 	NDFREE(&nd, NDF_ONLY_PNBUF);
3694 
3695 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3696 	    uap->data, uap->nbytes, td);
3697 
3698 	vrele(nd.ni_vp);
3699 	return (error);
3700 }
3701 
3702 int
3703 extattr_set_fd(td, uap)
3704 	struct thread *td;
3705 	struct extattr_set_fd_args /* {
3706 		syscallarg(int) fd;
3707 		syscallarg(int) attrnamespace;
3708 		syscallarg(const char *) attrname;
3709 		syscallarg(void *) data;
3710 		syscallarg(size_t) nbytes;
3711 	} */ *uap;
3712 {
3713 	struct file *fp;
3714 	char attrname[EXTATTR_MAXNAMELEN];
3715 	int error;
3716 
3717 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3718 	if (error)
3719 		return (error);
3720 
3721 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3722 		return (error);
3723 
3724 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3725 	    attrname, uap->data, uap->nbytes, td);
3726 	fdrop(fp, td);
3727 
3728 	return (error);
3729 }
3730 
3731 /*-
3732  * Get a named extended attribute on a file or directory
3733  *
3734  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3735  *            kernelspace string pointer "attrname", userspace buffer
3736  *            pointer "data", buffer length "nbytes", thread "td".
3737  * Returns: 0 on success, an error number otherwise
3738  * Locks: none
3739  * References: vp must be a valid reference for the duration of the call
3740  */
3741 static int
3742 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3743     void *data, size_t nbytes, struct thread *td)
3744 {
3745 	struct uio auio, *auiop;
3746 	struct iovec aiov;
3747 	ssize_t cnt;
3748 	size_t size, *sizep;
3749 	int error;
3750 
3751 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
3752 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3753 
3754 	/*
3755 	 * Slightly unusual semantics: if the user provides a NULL data
3756 	 * pointer, they don't want to receive the data, just the
3757 	 * maximum read length.
3758 	 */
3759 	auiop = NULL;
3760 	sizep = NULL;
3761 	cnt = 0;
3762 	if (data != NULL) {
3763 		aiov.iov_base = data;
3764 		aiov.iov_len = nbytes;
3765 		auio.uio_iov = &aiov;
3766 		auio.uio_offset = 0;
3767 		if (nbytes > INT_MAX) {
3768 			error = EINVAL;
3769 			goto done;
3770 		}
3771 		auio.uio_resid = nbytes;
3772 		auio.uio_rw = UIO_READ;
3773 		auio.uio_segflg = UIO_USERSPACE;
3774 		auio.uio_td = td;
3775 		auiop = &auio;
3776 		cnt = nbytes;
3777 	} else
3778 		sizep = &size;
3779 
3780 #ifdef MAC
3781 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
3782 	    attrname, &auio);
3783 	if (error)
3784 		goto done;
3785 #endif
3786 
3787 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
3788 	    td->td_ucred, td);
3789 
3790 	if (auiop != NULL) {
3791 		cnt -= auio.uio_resid;
3792 		td->td_retval[0] = cnt;
3793 	} else
3794 		td->td_retval[0] = size;
3795 
3796 done:
3797 	VOP_UNLOCK(vp, 0, td);
3798 	return (error);
3799 }
3800 
3801 int
3802 extattr_get_file(td, uap)
3803 	struct thread *td;
3804 	struct extattr_get_file_args /* {
3805 		syscallarg(const char *) path;
3806 		syscallarg(int) attrnamespace;
3807 		syscallarg(const char *) attrname;
3808 		syscallarg(void *) data;
3809 		syscallarg(size_t) nbytes;
3810 	} */ *uap;
3811 {
3812 	struct nameidata nd;
3813 	char attrname[EXTATTR_MAXNAMELEN];
3814 	int error;
3815 
3816 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3817 	if (error)
3818 		return (error);
3819 
3820 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3821 	if ((error = namei(&nd)) != 0)
3822 		return (error);
3823 	NDFREE(&nd, NDF_ONLY_PNBUF);
3824 
3825 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
3826 	    uap->data, uap->nbytes, td);
3827 
3828 	vrele(nd.ni_vp);
3829 	return (error);
3830 }
3831 
3832 int
3833 extattr_get_fd(td, uap)
3834 	struct thread *td;
3835 	struct extattr_get_fd_args /* {
3836 		syscallarg(int) fd;
3837 		syscallarg(int) attrnamespace;
3838 		syscallarg(const char *) attrname;
3839 		syscallarg(void *) data;
3840 		syscallarg(size_t) nbytes;
3841 	} */ *uap;
3842 {
3843 	struct file *fp;
3844 	char attrname[EXTATTR_MAXNAMELEN];
3845 	int error;
3846 
3847 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3848 	if (error)
3849 		return (error);
3850 
3851 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3852 		return (error);
3853 
3854 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3855 	    attrname, uap->data, uap->nbytes, td);
3856 
3857 	fdrop(fp, td);
3858 	return (error);
3859 }
3860 
3861 /*
3862  * extattr_delete_vp(): Delete a named extended attribute on a file or
3863  *                      directory
3864  *
3865  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3866  *            kernelspace string pointer "attrname", proc "p"
3867  * Returns: 0 on success, an error number otherwise
3868  * Locks: none
3869  * References: vp must be a valid reference for the duration of the call
3870  */
3871 static int
3872 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3873     struct thread *td)
3874 {
3875 	struct mount *mp;
3876 	int error;
3877 
3878 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3879 		return (error);
3880 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3881 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3882 
3883 #ifdef MAC
3884 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3885 	    attrname, NULL);
3886 #endif
3887 
3888 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
3889 	    td);
3890 
3891 	VOP_UNLOCK(vp, 0, td);
3892 	vn_finished_write(mp);
3893 	return (error);
3894 }
3895 
3896 int
3897 extattr_delete_file(td, uap)
3898 	struct thread *td;
3899 	struct extattr_delete_file_args /* {
3900 		syscallarg(const char *) path;
3901 		syscallarg(int) attrnamespace;
3902 		syscallarg(const char *) attrname;
3903 	} */ *uap;
3904 {
3905 	struct nameidata nd;
3906 	char attrname[EXTATTR_MAXNAMELEN];
3907 	int error;
3908 
3909 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3910 	if (error)
3911 		return(error);
3912 
3913 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3914 	if ((error = namei(&nd)) != 0)
3915 		return(error);
3916 	NDFREE(&nd, NDF_ONLY_PNBUF);
3917 
3918 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
3919 
3920 	vrele(nd.ni_vp);
3921 	return(error);
3922 }
3923 
3924 int
3925 extattr_delete_fd(td, uap)
3926 	struct thread *td;
3927 	struct extattr_delete_fd_args /* {
3928 		syscallarg(int) fd;
3929 		syscallarg(int) attrnamespace;
3930 		syscallarg(const char *) attrname;
3931 	} */ *uap;
3932 {
3933 	struct file *fp;
3934 	struct vnode *vp;
3935 	char attrname[EXTATTR_MAXNAMELEN];
3936 	int error;
3937 
3938 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3939 	if (error)
3940 		return (error);
3941 
3942 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3943 		return (error);
3944 	vp = (struct vnode *)fp->f_data;
3945 
3946 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
3947 
3948 	fdrop(fp, td);
3949 	return (error);
3950 }
3951