xref: /freebsd/sys/kern/vfs_extattr.c (revision dce6e6518b85561495cff38a3074a69d29d58a55)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 /* For 4.3 integer FS ID compatibility */
45 #include "opt_compat.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/sysent.h>
53 #include <sys/mac.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/mutex.h>
57 #include <sys/sysproto.h>
58 #include <sys/namei.h>
59 #include <sys/filedesc.h>
60 #include <sys/kernel.h>
61 #include <sys/fcntl.h>
62 #include <sys/file.h>
63 #include <sys/limits.h>
64 #include <sys/linker.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/proc.h>
70 #include <sys/dirent.h>
71 #include <sys/extattr.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 
76 #include <machine/stdarg.h>
77 
78 #include <vm/vm.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/uma.h>
82 
83 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86 static int setfmode(struct thread *td, struct vnode *, int);
87 static int setfflags(struct thread *td, struct vnode *, int);
88 static int setutimes(struct thread *td, struct vnode *,
89     const struct timespec *, int, int);
90 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91     struct thread *td);
92 
93 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94     size_t nbytes, struct thread *td);
95 
96 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97 int (*softdep_fsync_hook)(struct vnode *);
98 
99 /*
100  * The module initialization routine for POSIX asynchronous I/O will
101  * set this to the version of AIO that it implements.  (Zero means
102  * that it is not implemented.)  This value is used here by pathconf()
103  * and in kern_descrip.c by fpathconf().
104  */
105 int async_io_version;
106 
107 /*
108  * Sync each mounted filesystem.
109  */
110 #ifndef _SYS_SYSPROTO_H_
111 struct sync_args {
112         int     dummy;
113 };
114 #endif
115 
116 #ifdef DEBUG
117 static int syncprt = 0;
118 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119 #endif
120 
121 /* ARGSUSED */
122 int
123 sync(td, uap)
124 	struct thread *td;
125 	struct sync_args *uap;
126 {
127 	struct mount *mp, *nmp;
128 	int asyncflag;
129 
130 	mtx_lock(&mountlist_mtx);
131 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133 			nmp = TAILQ_NEXT(mp, mnt_list);
134 			continue;
135 		}
136 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138 			asyncflag = mp->mnt_flag & MNT_ASYNC;
139 			mp->mnt_flag &= ~MNT_ASYNC;
140 			vfs_msync(mp, MNT_NOWAIT);
141 			VFS_SYNC(mp, MNT_NOWAIT,
142 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143 			mp->mnt_flag |= asyncflag;
144 			vn_finished_write(mp);
145 		}
146 		mtx_lock(&mountlist_mtx);
147 		nmp = TAILQ_NEXT(mp, mnt_list);
148 		vfs_unbusy(mp, td);
149 	}
150 	mtx_unlock(&mountlist_mtx);
151 #if 0
152 /*
153  * XXX don't call vfs_bufstats() yet because that routine
154  * was not imported in the Lite2 merge.
155  */
156 #ifdef DIAGNOSTIC
157 	if (syncprt)
158 		vfs_bufstats();
159 #endif /* DIAGNOSTIC */
160 #endif
161 	return (0);
162 }
163 
164 /* XXX PRISON: could be per prison flag */
165 static int prison_quotas;
166 #if 0
167 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168 #endif
169 
170 /*
171  * Change filesystem quotas.
172  */
173 #ifndef _SYS_SYSPROTO_H_
174 struct quotactl_args {
175 	char *path;
176 	int cmd;
177 	int uid;
178 	caddr_t arg;
179 };
180 #endif
181 /* ARGSUSED */
182 int
183 quotactl(td, uap)
184 	struct thread *td;
185 	register struct quotactl_args /* {
186 		char *path;
187 		int cmd;
188 		int uid;
189 		caddr_t arg;
190 	} */ *uap;
191 {
192 	struct mount *mp;
193 	int error;
194 	struct nameidata nd;
195 
196 	if (jailed(td->td_ucred) && !prison_quotas)
197 		return (EPERM);
198 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199 	if ((error = namei(&nd)) != 0)
200 		return (error);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203 	vrele(nd.ni_vp);
204 	if (error)
205 		return (error);
206 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207 	vn_finished_write(mp);
208 	return (error);
209 }
210 
211 /*
212  * Get filesystem statistics.
213  */
214 #ifndef _SYS_SYSPROTO_H_
215 struct statfs_args {
216 	char *path;
217 	struct statfs *buf;
218 };
219 #endif
220 /* ARGSUSED */
221 int
222 statfs(td, uap)
223 	struct thread *td;
224 	register struct statfs_args /* {
225 		char *path;
226 		struct statfs *buf;
227 	} */ *uap;
228 {
229 	register struct mount *mp;
230 	register struct statfs *sp;
231 	int error;
232 	struct nameidata nd;
233 	struct statfs sb;
234 
235 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236 	if ((error = namei(&nd)) != 0)
237 		return (error);
238 	mp = nd.ni_vp->v_mount;
239 	sp = &mp->mnt_stat;
240 	NDFREE(&nd, NDF_ONLY_PNBUF);
241 	vrele(nd.ni_vp);
242 #ifdef MAC
243 	error = mac_check_mount_stat(td->td_ucred, mp);
244 	if (error)
245 		return (error);
246 #endif
247 	error = VFS_STATFS(mp, sp, td);
248 	if (error)
249 		return (error);
250 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251 	if (suser(td)) {
252 		bcopy(sp, &sb, sizeof(sb));
253 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254 		sp = &sb;
255 	}
256 	return (copyout(sp, uap->buf, sizeof(*sp)));
257 }
258 
259 /*
260  * Get filesystem statistics.
261  */
262 #ifndef _SYS_SYSPROTO_H_
263 struct fstatfs_args {
264 	int fd;
265 	struct statfs *buf;
266 };
267 #endif
268 /* ARGSUSED */
269 int
270 fstatfs(td, uap)
271 	struct thread *td;
272 	register struct fstatfs_args /* {
273 		int fd;
274 		struct statfs *buf;
275 	} */ *uap;
276 {
277 	struct file *fp;
278 	struct mount *mp;
279 	register struct statfs *sp;
280 	int error;
281 	struct statfs sb;
282 
283 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284 		return (error);
285 	mp = fp->f_vnode->v_mount;
286 	fdrop(fp, td);
287 	if (mp == NULL)
288 		return (EBADF);
289 #ifdef MAC
290 	error = mac_check_mount_stat(td->td_ucred, mp);
291 	if (error)
292 		return (error);
293 #endif
294 	sp = &mp->mnt_stat;
295 	error = VFS_STATFS(mp, sp, td);
296 	if (error)
297 		return (error);
298 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299 	if (suser(td)) {
300 		bcopy(sp, &sb, sizeof(sb));
301 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302 		sp = &sb;
303 	}
304 	return (copyout(sp, uap->buf, sizeof(*sp)));
305 }
306 
307 /*
308  * Get statistics on all filesystems.
309  */
310 #ifndef _SYS_SYSPROTO_H_
311 struct getfsstat_args {
312 	struct statfs *buf;
313 	long bufsize;
314 	int flags;
315 };
316 #endif
317 int
318 getfsstat(td, uap)
319 	struct thread *td;
320 	register struct getfsstat_args /* {
321 		struct statfs *buf;
322 		long bufsize;
323 		int flags;
324 	} */ *uap;
325 {
326 	register struct mount *mp, *nmp;
327 	register struct statfs *sp;
328 	caddr_t sfsp;
329 	long count, maxcount, error;
330 
331 	maxcount = uap->bufsize / sizeof(struct statfs);
332 	sfsp = (caddr_t)uap->buf;
333 	count = 0;
334 	mtx_lock(&mountlist_mtx);
335 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336 #ifdef MAC
337 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338 			nmp = TAILQ_NEXT(mp, mnt_list);
339 			continue;
340 		}
341 #endif
342 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343 			nmp = TAILQ_NEXT(mp, mnt_list);
344 			continue;
345 		}
346 		if (sfsp && count < maxcount) {
347 			sp = &mp->mnt_stat;
348 			/*
349 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351 			 * overrides MNT_WAIT.
352 			 */
353 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354 			    (uap->flags & MNT_WAIT)) &&
355 			    (error = VFS_STATFS(mp, sp, td))) {
356 				mtx_lock(&mountlist_mtx);
357 				nmp = TAILQ_NEXT(mp, mnt_list);
358 				vfs_unbusy(mp, td);
359 				continue;
360 			}
361 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362 			error = copyout(sp, sfsp, sizeof(*sp));
363 			if (error) {
364 				vfs_unbusy(mp, td);
365 				return (error);
366 			}
367 			sfsp += sizeof(*sp);
368 		}
369 		count++;
370 		mtx_lock(&mountlist_mtx);
371 		nmp = TAILQ_NEXT(mp, mnt_list);
372 		vfs_unbusy(mp, td);
373 	}
374 	mtx_unlock(&mountlist_mtx);
375 	if (sfsp && count > maxcount)
376 		td->td_retval[0] = maxcount;
377 	else
378 		td->td_retval[0] = count;
379 	return (0);
380 }
381 
382 /*
383  * Change current working directory to a given file descriptor.
384  */
385 #ifndef _SYS_SYSPROTO_H_
386 struct fchdir_args {
387 	int	fd;
388 };
389 #endif
390 /* ARGSUSED */
391 int
392 fchdir(td, uap)
393 	struct thread *td;
394 	struct fchdir_args /* {
395 		int fd;
396 	} */ *uap;
397 {
398 	register struct filedesc *fdp = td->td_proc->p_fd;
399 	struct vnode *vp, *tdp, *vpold;
400 	struct mount *mp;
401 	struct file *fp;
402 	int error;
403 
404 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405 		return (error);
406 	vp = fp->f_vnode;
407 	VREF(vp);
408 	fdrop(fp, td);
409 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410 	if (vp->v_type != VDIR)
411 		error = ENOTDIR;
412 #ifdef MAC
413 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414 	}
415 #endif
416 	else
417 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418 	while (!error && (mp = vp->v_mountedhere) != NULL) {
419 		if (vfs_busy(mp, 0, 0, td))
420 			continue;
421 		error = VFS_ROOT(mp, &tdp);
422 		vfs_unbusy(mp, td);
423 		if (error)
424 			break;
425 		vput(vp);
426 		vp = tdp;
427 	}
428 	if (error) {
429 		vput(vp);
430 		return (error);
431 	}
432 	VOP_UNLOCK(vp, 0, td);
433 	FILEDESC_LOCK(fdp);
434 	vpold = fdp->fd_cdir;
435 	fdp->fd_cdir = vp;
436 	FILEDESC_UNLOCK(fdp);
437 	vrele(vpold);
438 	return (0);
439 }
440 
441 /*
442  * Change current working directory (``.'').
443  */
444 #ifndef _SYS_SYSPROTO_H_
445 struct chdir_args {
446 	char	*path;
447 };
448 #endif
449 /* ARGSUSED */
450 int
451 chdir(td, uap)
452 	struct thread *td;
453 	struct chdir_args /* {
454 		char *path;
455 	} */ *uap;
456 {
457 
458 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459 }
460 
461 int
462 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463 {
464 	register struct filedesc *fdp = td->td_proc->p_fd;
465 	int error;
466 	struct nameidata nd;
467 	struct vnode *vp;
468 
469 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470 	if ((error = namei(&nd)) != 0)
471 		return (error);
472 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473 		vput(nd.ni_vp);
474 		NDFREE(&nd, NDF_ONLY_PNBUF);
475 		return (error);
476 	}
477 	VOP_UNLOCK(nd.ni_vp, 0, td);
478 	NDFREE(&nd, NDF_ONLY_PNBUF);
479 	FILEDESC_LOCK(fdp);
480 	vp = fdp->fd_cdir;
481 	fdp->fd_cdir = nd.ni_vp;
482 	FILEDESC_UNLOCK(fdp);
483 	vrele(vp);
484 	return (0);
485 }
486 
487 /*
488  * Helper function for raised chroot(2) security function:  Refuse if
489  * any filedescriptors are open directories.
490  */
491 static int
492 chroot_refuse_vdir_fds(fdp)
493 	struct filedesc *fdp;
494 {
495 	struct vnode *vp;
496 	struct file *fp;
497 	int fd;
498 
499 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501 		fp = fget_locked(fdp, fd);
502 		if (fp == NULL)
503 			continue;
504 		if (fp->f_type == DTYPE_VNODE) {
505 			vp = fp->f_vnode;
506 			if (vp->v_type == VDIR)
507 				return (EPERM);
508 		}
509 	}
510 	return (0);
511 }
512 
513 /*
514  * This sysctl determines if we will allow a process to chroot(2) if it
515  * has a directory open:
516  *	0: disallowed for all processes.
517  *	1: allowed for processes that were not already chroot(2)'ed.
518  *	2: allowed for all processes.
519  */
520 
521 static int chroot_allow_open_directories = 1;
522 
523 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524      &chroot_allow_open_directories, 0, "");
525 
526 /*
527  * Change notion of root (``/'') directory.
528  */
529 #ifndef _SYS_SYSPROTO_H_
530 struct chroot_args {
531 	char	*path;
532 };
533 #endif
534 /* ARGSUSED */
535 int
536 chroot(td, uap)
537 	struct thread *td;
538 	struct chroot_args /* {
539 		char *path;
540 	} */ *uap;
541 {
542 	int error;
543 	struct nameidata nd;
544 
545 	error = suser_cred(td->td_ucred, PRISON_ROOT);
546 	if (error)
547 		return (error);
548 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549 	mtx_lock(&Giant);
550 	error = namei(&nd);
551 	if (error)
552 		goto error;
553 	if ((error = change_dir(nd.ni_vp, td)) != 0)
554 		goto e_vunlock;
555 #ifdef MAC
556 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557 		goto e_vunlock;
558 #endif
559 	VOP_UNLOCK(nd.ni_vp, 0, td);
560 	error = change_root(nd.ni_vp, td);
561 	vrele(nd.ni_vp);
562 	NDFREE(&nd, NDF_ONLY_PNBUF);
563 	mtx_unlock(&Giant);
564 	return (error);
565 e_vunlock:
566 	vput(nd.ni_vp);
567 error:
568 	mtx_unlock(&Giant);
569 	NDFREE(&nd, NDF_ONLY_PNBUF);
570 	return (error);
571 }
572 
573 /*
574  * Common routine for chroot and chdir.  Callers must provide a locked vnode
575  * instance.
576  */
577 int
578 change_dir(vp, td)
579 	struct vnode *vp;
580 	struct thread *td;
581 {
582 	int error;
583 
584 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585 	if (vp->v_type != VDIR)
586 		return (ENOTDIR);
587 #ifdef MAC
588 	error = mac_check_vnode_chdir(td->td_ucred, vp);
589 	if (error)
590 		return (error);
591 #endif
592 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593 	return (error);
594 }
595 
596 /*
597  * Common routine for kern_chroot() and jail_attach().  The caller is
598  * responsible for invoking suser() and mac_check_chroot() to authorize this
599  * operation.
600  */
601 int
602 change_root(vp, td)
603 	struct vnode *vp;
604 	struct thread *td;
605 {
606 	struct filedesc *fdp;
607 	struct vnode *oldvp;
608 	int error;
609 
610 	mtx_assert(&Giant, MA_OWNED);
611 	fdp = td->td_proc->p_fd;
612 	FILEDESC_LOCK(fdp);
613 	if (chroot_allow_open_directories == 0 ||
614 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615 		error = chroot_refuse_vdir_fds(fdp);
616 		if (error) {
617 			FILEDESC_UNLOCK(fdp);
618 			return (error);
619 		}
620 	}
621 	oldvp = fdp->fd_rdir;
622 	fdp->fd_rdir = vp;
623 	VREF(fdp->fd_rdir);
624 	if (!fdp->fd_jdir) {
625 		fdp->fd_jdir = vp;
626 		VREF(fdp->fd_jdir);
627 	}
628 	FILEDESC_UNLOCK(fdp);
629 	vrele(oldvp);
630 	return (0);
631 }
632 
633 /*
634  * Check permissions, allocate an open file structure,
635  * and call the device open routine if any.
636  */
637 #ifndef _SYS_SYSPROTO_H_
638 struct open_args {
639 	char	*path;
640 	int	flags;
641 	int	mode;
642 };
643 #endif
644 int
645 open(td, uap)
646 	struct thread *td;
647 	register struct open_args /* {
648 		char *path;
649 		int flags;
650 		int mode;
651 	} */ *uap;
652 {
653 
654 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655 }
656 
657 int
658 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659     int mode)
660 {
661 	struct proc *p = td->td_proc;
662 	struct filedesc *fdp = p->p_fd;
663 	struct file *fp;
664 	struct vnode *vp;
665 	struct vattr vat;
666 	struct mount *mp;
667 	int cmode;
668 	struct file *nfp;
669 	int type, indx, error;
670 	struct flock lf;
671 	struct nameidata nd;
672 
673 	if ((flags & O_ACCMODE) == O_ACCMODE)
674 		return (EINVAL);
675 	flags = FFLAGS(flags);
676 	error = falloc(td, &nfp, &indx);
677 	if (error)
678 		return (error);
679 	fp = nfp;
680 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
681 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
682 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
683 	/*
684 	 * Bump the ref count to prevent another process from closing
685 	 * the descriptor while we are blocked in vn_open()
686 	 */
687 	fhold(fp);
688 	error = vn_open(&nd, &flags, cmode);
689 	if (error) {
690 		/*
691 		 * release our own reference
692 		 */
693 		fdrop(fp, td);
694 
695 		/*
696 		 * handle special fdopen() case.  bleh.  dupfdopen() is
697 		 * responsible for dropping the old contents of ofiles[indx]
698 		 * if it succeeds.
699 		 */
700 		if ((error == ENODEV || error == ENXIO) &&
701 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
702 		    (error =
703 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
704 			td->td_retval[0] = indx;
705 			return (0);
706 		}
707 		/*
708 		 * Clean up the descriptor, but only if another thread hadn't
709 		 * replaced or closed it.
710 		 */
711 		FILEDESC_LOCK(fdp);
712 		if (fdp->fd_ofiles[indx] == fp) {
713 			fdp->fd_ofiles[indx] = NULL;
714 			FILEDESC_UNLOCK(fdp);
715 			fdrop(fp, td);
716 		} else
717 			FILEDESC_UNLOCK(fdp);
718 
719 		if (error == ERESTART)
720 			error = EINTR;
721 		return (error);
722 	}
723 	td->td_dupfd = 0;
724 	NDFREE(&nd, NDF_ONLY_PNBUF);
725 	vp = nd.ni_vp;
726 
727 	/*
728 	 * There should be 2 references on the file, one from the descriptor
729 	 * table, and one for us.
730 	 *
731 	 * Handle the case where someone closed the file (via its file
732 	 * descriptor) while we were blocked.  The end result should look
733 	 * like opening the file succeeded but it was immediately closed.
734 	 */
735 	FILEDESC_LOCK(fdp);
736 	FILE_LOCK(fp);
737 	if (fp->f_count == 1) {
738 		KASSERT(fdp->fd_ofiles[indx] != fp,
739 		    ("Open file descriptor lost all refs"));
740 		FILEDESC_UNLOCK(fdp);
741 		FILE_UNLOCK(fp);
742 		VOP_UNLOCK(vp, 0, td);
743 		vn_close(vp, flags & FMASK, fp->f_cred, td);
744 		fdrop(fp, td);
745 		td->td_retval[0] = indx;
746 		return 0;
747 	}
748 	fp->f_vnode = vp;
749 	fp->f_data = vp;
750 	fp->f_flag = flags & FMASK;
751 	fp->f_ops = &vnops;
752 	fp->f_seqcount = 1;
753 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
754 	FILEDESC_UNLOCK(fdp);
755 	FILE_UNLOCK(fp);
756 
757 	/* assert that vn_open created a backing object if one is needed */
758 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
759 		("open: vmio vnode has no backing object after vn_open"));
760 
761 	VOP_UNLOCK(vp, 0, td);
762 	if (flags & (O_EXLOCK | O_SHLOCK)) {
763 		lf.l_whence = SEEK_SET;
764 		lf.l_start = 0;
765 		lf.l_len = 0;
766 		if (flags & O_EXLOCK)
767 			lf.l_type = F_WRLCK;
768 		else
769 			lf.l_type = F_RDLCK;
770 		type = F_FLOCK;
771 		if ((flags & FNONBLOCK) == 0)
772 			type |= F_WAIT;
773 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
774 			    type)) != 0)
775 			goto bad;
776 		fp->f_flag |= FHASLOCK;
777 	}
778 	if (flags & O_TRUNC) {
779 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
780 			goto bad;
781 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
782 		VATTR_NULL(&vat);
783 		vat.va_size = 0;
784 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
785 #ifdef MAC
786 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
787 		if (error == 0)
788 #endif
789 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
790 		VOP_UNLOCK(vp, 0, td);
791 		vn_finished_write(mp);
792 		if (error)
793 			goto bad;
794 	}
795 	/*
796 	 * Release our private reference, leaving the one associated with
797 	 * the descriptor table intact.
798 	 */
799 	fdrop(fp, td);
800 	td->td_retval[0] = indx;
801 	return (0);
802 bad:
803 	FILEDESC_LOCK(fdp);
804 	if (fdp->fd_ofiles[indx] == fp) {
805 		fdp->fd_ofiles[indx] = NULL;
806 		FILEDESC_UNLOCK(fdp);
807 		fdrop(fp, td);
808 	} else
809 		FILEDESC_UNLOCK(fdp);
810 	fdrop(fp, td);
811 	return (error);
812 }
813 
814 #ifdef COMPAT_43
815 /*
816  * Create a file.
817  */
818 #ifndef _SYS_SYSPROTO_H_
819 struct ocreat_args {
820 	char	*path;
821 	int	mode;
822 };
823 #endif
824 int
825 ocreat(td, uap)
826 	struct thread *td;
827 	register struct ocreat_args /* {
828 		char *path;
829 		int mode;
830 	} */ *uap;
831 {
832 	struct open_args /* {
833 		char *path;
834 		int flags;
835 		int mode;
836 	} */ nuap;
837 
838 	nuap.path = uap->path;
839 	nuap.mode = uap->mode;
840 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
841 	return (open(td, &nuap));
842 }
843 #endif /* COMPAT_43 */
844 
845 /*
846  * Create a special file.
847  */
848 #ifndef _SYS_SYSPROTO_H_
849 struct mknod_args {
850 	char	*path;
851 	int	mode;
852 	int	dev;
853 };
854 #endif
855 /* ARGSUSED */
856 int
857 mknod(td, uap)
858 	struct thread *td;
859 	register struct mknod_args /* {
860 		char *path;
861 		int mode;
862 		int dev;
863 	} */ *uap;
864 {
865 
866 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
867 }
868 
869 int
870 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
871     int dev)
872 {
873 	struct vnode *vp;
874 	struct mount *mp;
875 	struct vattr vattr;
876 	int error;
877 	int whiteout = 0;
878 	struct nameidata nd;
879 
880 	switch (mode & S_IFMT) {
881 	case S_IFCHR:
882 	case S_IFBLK:
883 		error = suser(td);
884 		break;
885 	default:
886 		error = suser_cred(td->td_ucred, PRISON_ROOT);
887 		break;
888 	}
889 	if (error)
890 		return (error);
891 restart:
892 	bwillwrite();
893 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
894 	if ((error = namei(&nd)) != 0)
895 		return (error);
896 	vp = nd.ni_vp;
897 	if (vp != NULL) {
898 		vrele(vp);
899 		error = EEXIST;
900 	} else {
901 		VATTR_NULL(&vattr);
902 		FILEDESC_LOCK(td->td_proc->p_fd);
903 		vattr.va_mode = (mode & ALLPERMS) &
904 		    ~td->td_proc->p_fd->fd_cmask;
905 		FILEDESC_UNLOCK(td->td_proc->p_fd);
906 		vattr.va_rdev = dev;
907 		whiteout = 0;
908 
909 		switch (mode & S_IFMT) {
910 		case S_IFMT:	/* used by badsect to flag bad sectors */
911 			vattr.va_type = VBAD;
912 			break;
913 		case S_IFCHR:
914 			vattr.va_type = VCHR;
915 			break;
916 		case S_IFBLK:
917 			vattr.va_type = VBLK;
918 			break;
919 		case S_IFWHT:
920 			whiteout = 1;
921 			break;
922 		default:
923 			error = EINVAL;
924 			break;
925 		}
926 	}
927 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
928 		NDFREE(&nd, NDF_ONLY_PNBUF);
929 		vput(nd.ni_dvp);
930 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
931 			return (error);
932 		goto restart;
933 	}
934 #ifdef MAC
935 	if (error == 0 && !whiteout)
936 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
937 		    &nd.ni_cnd, &vattr);
938 #endif
939 	if (!error) {
940 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
941 		if (whiteout)
942 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
943 		else {
944 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
945 						&nd.ni_cnd, &vattr);
946 			if (error == 0)
947 				vput(nd.ni_vp);
948 		}
949 	}
950 	NDFREE(&nd, NDF_ONLY_PNBUF);
951 	vput(nd.ni_dvp);
952 	vn_finished_write(mp);
953 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
954 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
955 	return (error);
956 }
957 
958 /*
959  * Create a named pipe.
960  */
961 #ifndef _SYS_SYSPROTO_H_
962 struct mkfifo_args {
963 	char	*path;
964 	int	mode;
965 };
966 #endif
967 /* ARGSUSED */
968 int
969 mkfifo(td, uap)
970 	struct thread *td;
971 	register struct mkfifo_args /* {
972 		char *path;
973 		int mode;
974 	} */ *uap;
975 {
976 
977 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
978 }
979 
980 int
981 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
982 {
983 	struct mount *mp;
984 	struct vattr vattr;
985 	int error;
986 	struct nameidata nd;
987 
988 restart:
989 	bwillwrite();
990 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
991 	if ((error = namei(&nd)) != 0)
992 		return (error);
993 	if (nd.ni_vp != NULL) {
994 		NDFREE(&nd, NDF_ONLY_PNBUF);
995 		vrele(nd.ni_vp);
996 		vput(nd.ni_dvp);
997 		return (EEXIST);
998 	}
999 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1000 		NDFREE(&nd, NDF_ONLY_PNBUF);
1001 		vput(nd.ni_dvp);
1002 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1003 			return (error);
1004 		goto restart;
1005 	}
1006 	VATTR_NULL(&vattr);
1007 	vattr.va_type = VFIFO;
1008 	FILEDESC_LOCK(td->td_proc->p_fd);
1009 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1010 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1011 #ifdef MAC
1012 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1013 	    &vattr);
1014 	if (error)
1015 		goto out;
1016 #endif
1017 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1018 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1019 	if (error == 0)
1020 		vput(nd.ni_vp);
1021 #ifdef MAC
1022 out:
1023 #endif
1024 	NDFREE(&nd, NDF_ONLY_PNBUF);
1025 	vput(nd.ni_dvp);
1026 	vn_finished_write(mp);
1027 	return (error);
1028 }
1029 
1030 /*
1031  * Make a hard file link.
1032  */
1033 #ifndef _SYS_SYSPROTO_H_
1034 struct link_args {
1035 	char	*path;
1036 	char	*link;
1037 };
1038 #endif
1039 /* ARGSUSED */
1040 int
1041 link(td, uap)
1042 	struct thread *td;
1043 	register struct link_args /* {
1044 		char *path;
1045 		char *link;
1046 	} */ *uap;
1047 {
1048 
1049 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1050 }
1051 
1052 int
1053 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1054 {
1055 	struct vnode *vp;
1056 	struct mount *mp;
1057 	struct nameidata nd;
1058 	int error;
1059 
1060 	bwillwrite();
1061 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1062 	if ((error = namei(&nd)) != 0)
1063 		return (error);
1064 	NDFREE(&nd, NDF_ONLY_PNBUF);
1065 	vp = nd.ni_vp;
1066 	if (vp->v_type == VDIR) {
1067 		vrele(vp);
1068 		return (EPERM);		/* POSIX */
1069 	}
1070 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1071 		vrele(vp);
1072 		return (error);
1073 	}
1074 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1075 	if ((error = namei(&nd)) == 0) {
1076 		if (nd.ni_vp != NULL) {
1077 			vrele(nd.ni_vp);
1078 			error = EEXIST;
1079 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1080 		    == 0) {
1081 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1082 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1083 #ifdef MAC
1084 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1085 			    vp, &nd.ni_cnd);
1086 			if (error == 0)
1087 #endif
1088 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1089 			VOP_UNLOCK(vp, 0, td);
1090 		}
1091 		NDFREE(&nd, NDF_ONLY_PNBUF);
1092 		vput(nd.ni_dvp);
1093 	}
1094 	vrele(vp);
1095 	vn_finished_write(mp);
1096 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1097 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1098 	return (error);
1099 }
1100 
1101 /*
1102  * Make a symbolic link.
1103  */
1104 #ifndef _SYS_SYSPROTO_H_
1105 struct symlink_args {
1106 	char	*path;
1107 	char	*link;
1108 };
1109 #endif
1110 /* ARGSUSED */
1111 int
1112 symlink(td, uap)
1113 	struct thread *td;
1114 	register struct symlink_args /* {
1115 		char *path;
1116 		char *link;
1117 	} */ *uap;
1118 {
1119 
1120 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1121 }
1122 
1123 int
1124 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1125 {
1126 	struct mount *mp;
1127 	struct vattr vattr;
1128 	char *syspath;
1129 	int error;
1130 	struct nameidata nd;
1131 
1132 	if (segflg == UIO_SYSSPACE) {
1133 		syspath = path;
1134 	} else {
1135 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1136 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1137 			goto out;
1138 	}
1139 restart:
1140 	bwillwrite();
1141 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1142 	if ((error = namei(&nd)) != 0)
1143 		goto out;
1144 	if (nd.ni_vp) {
1145 		NDFREE(&nd, NDF_ONLY_PNBUF);
1146 		vrele(nd.ni_vp);
1147 		vput(nd.ni_dvp);
1148 		error = EEXIST;
1149 		goto out;
1150 	}
1151 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1152 		NDFREE(&nd, NDF_ONLY_PNBUF);
1153 		vput(nd.ni_dvp);
1154 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1155 			return (error);
1156 		goto restart;
1157 	}
1158 	VATTR_NULL(&vattr);
1159 	FILEDESC_LOCK(td->td_proc->p_fd);
1160 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1161 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1162 #ifdef MAC
1163 	vattr.va_type = VLNK;
1164 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1165 	    &vattr);
1166 	if (error)
1167 		goto out2;
1168 #endif
1169 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1170 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1171 	if (error == 0)
1172 		vput(nd.ni_vp);
1173 #ifdef MAC
1174 out2:
1175 #endif
1176 	NDFREE(&nd, NDF_ONLY_PNBUF);
1177 	vput(nd.ni_dvp);
1178 	vn_finished_write(mp);
1179 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1180 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1181 out:
1182 	if (segflg != UIO_SYSSPACE)
1183 		uma_zfree(namei_zone, syspath);
1184 	return (error);
1185 }
1186 
1187 /*
1188  * Delete a whiteout from the filesystem.
1189  */
1190 /* ARGSUSED */
1191 int
1192 undelete(td, uap)
1193 	struct thread *td;
1194 	register struct undelete_args /* {
1195 		char *path;
1196 	} */ *uap;
1197 {
1198 	int error;
1199 	struct mount *mp;
1200 	struct nameidata nd;
1201 
1202 restart:
1203 	bwillwrite();
1204 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1205 	    uap->path, td);
1206 	error = namei(&nd);
1207 	if (error)
1208 		return (error);
1209 
1210 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1211 		NDFREE(&nd, NDF_ONLY_PNBUF);
1212 		if (nd.ni_vp)
1213 			vrele(nd.ni_vp);
1214 		vput(nd.ni_dvp);
1215 		return (EEXIST);
1216 	}
1217 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1218 		NDFREE(&nd, NDF_ONLY_PNBUF);
1219 		vput(nd.ni_dvp);
1220 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1221 			return (error);
1222 		goto restart;
1223 	}
1224 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1225 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1226 	NDFREE(&nd, NDF_ONLY_PNBUF);
1227 	vput(nd.ni_dvp);
1228 	vn_finished_write(mp);
1229 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1230 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1231 	return (error);
1232 }
1233 
1234 /*
1235  * Delete a name from the filesystem.
1236  */
1237 #ifndef _SYS_SYSPROTO_H_
1238 struct unlink_args {
1239 	char	*path;
1240 };
1241 #endif
1242 /* ARGSUSED */
1243 int
1244 unlink(td, uap)
1245 	struct thread *td;
1246 	struct unlink_args /* {
1247 		char *path;
1248 	} */ *uap;
1249 {
1250 
1251 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1252 }
1253 
1254 int
1255 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1256 {
1257 	struct mount *mp;
1258 	struct vnode *vp;
1259 	int error;
1260 	struct nameidata nd;
1261 
1262 restart:
1263 	bwillwrite();
1264 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1265 	if ((error = namei(&nd)) != 0)
1266 		return (error);
1267 	vp = nd.ni_vp;
1268 	if (vp->v_type == VDIR)
1269 		error = EPERM;		/* POSIX */
1270 	else {
1271 		/*
1272 		 * The root of a mounted filesystem cannot be deleted.
1273 		 *
1274 		 * XXX: can this only be a VDIR case?
1275 		 */
1276 		if (vp->v_vflag & VV_ROOT)
1277 			error = EBUSY;
1278 	}
1279 	if (error == 0) {
1280 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1281 			NDFREE(&nd, NDF_ONLY_PNBUF);
1282 			if (vp == nd.ni_dvp)
1283 				vrele(vp);
1284 			else
1285 				vput(vp);
1286 			vput(nd.ni_dvp);
1287 			if ((error = vn_start_write(NULL, &mp,
1288 			    V_XSLEEP | PCATCH)) != 0)
1289 				return (error);
1290 			goto restart;
1291 		}
1292 #ifdef MAC
1293 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1294 		    &nd.ni_cnd);
1295 		if (error)
1296 			goto out;
1297 #endif
1298 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1299 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1300 #ifdef MAC
1301 out:
1302 #endif
1303 		vn_finished_write(mp);
1304 	}
1305 	NDFREE(&nd, NDF_ONLY_PNBUF);
1306 	if (vp == nd.ni_dvp)
1307 		vrele(vp);
1308 	else
1309 		vput(vp);
1310 	vput(nd.ni_dvp);
1311 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1312 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1313 	return (error);
1314 }
1315 
1316 /*
1317  * Reposition read/write file offset.
1318  */
1319 #ifndef _SYS_SYSPROTO_H_
1320 struct lseek_args {
1321 	int	fd;
1322 	int	pad;
1323 	off_t	offset;
1324 	int	whence;
1325 };
1326 #endif
1327 int
1328 lseek(td, uap)
1329 	struct thread *td;
1330 	register struct lseek_args /* {
1331 		int fd;
1332 		int pad;
1333 		off_t offset;
1334 		int whence;
1335 	} */ *uap;
1336 {
1337 	struct ucred *cred = td->td_ucred;
1338 	struct file *fp;
1339 	struct vnode *vp;
1340 	struct vattr vattr;
1341 	off_t offset;
1342 	int error, noneg;
1343 
1344 	if ((error = fget(td, uap->fd, &fp)) != 0)
1345 		return (error);
1346 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1347 		fdrop(fp, td);
1348 		return (ESPIPE);
1349 	}
1350 	vp = fp->f_vnode;
1351 	noneg = (vp->v_type != VCHR);
1352 	offset = uap->offset;
1353 	switch (uap->whence) {
1354 	case L_INCR:
1355 		if (noneg &&
1356 		    (fp->f_offset < 0 ||
1357 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1358 			error = EOVERFLOW;
1359 			break;
1360 		}
1361 		offset += fp->f_offset;
1362 		break;
1363 	case L_XTND:
1364 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1365 		error = VOP_GETATTR(vp, &vattr, cred, td);
1366 		VOP_UNLOCK(vp, 0, td);
1367 		if (error)
1368 			break;
1369 		if (noneg &&
1370 		    (vattr.va_size > OFF_MAX ||
1371 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1372 			error = EOVERFLOW;
1373 			break;
1374 		}
1375 		offset += vattr.va_size;
1376 		break;
1377 	case L_SET:
1378 		break;
1379 	default:
1380 		error = EINVAL;
1381 	}
1382 	if (error == 0 && noneg && offset < 0)
1383 		error = EINVAL;
1384 	if (error != 0) {
1385 		fdrop(fp, td);
1386 		return (error);
1387 	}
1388 	fp->f_offset = offset;
1389 	*(off_t *)(td->td_retval) = fp->f_offset;
1390 	fdrop(fp, td);
1391 	return (0);
1392 }
1393 
1394 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1395 /*
1396  * Reposition read/write file offset.
1397  */
1398 #ifndef _SYS_SYSPROTO_H_
1399 struct olseek_args {
1400 	int	fd;
1401 	long	offset;
1402 	int	whence;
1403 };
1404 #endif
1405 int
1406 olseek(td, uap)
1407 	struct thread *td;
1408 	register struct olseek_args /* {
1409 		int fd;
1410 		long offset;
1411 		int whence;
1412 	} */ *uap;
1413 {
1414 	struct lseek_args /* {
1415 		int fd;
1416 		int pad;
1417 		off_t offset;
1418 		int whence;
1419 	} */ nuap;
1420 	int error;
1421 
1422 	nuap.fd = uap->fd;
1423 	nuap.offset = uap->offset;
1424 	nuap.whence = uap->whence;
1425 	error = lseek(td, &nuap);
1426 	return (error);
1427 }
1428 #endif /* COMPAT_43 */
1429 
1430 /*
1431  * Check access permissions using passed credentials.
1432  */
1433 static int
1434 vn_access(vp, user_flags, cred, td)
1435 	struct vnode	*vp;
1436 	int		user_flags;
1437 	struct ucred	*cred;
1438 	struct thread	*td;
1439 {
1440 	int error, flags;
1441 
1442 	/* Flags == 0 means only check for existence. */
1443 	error = 0;
1444 	if (user_flags) {
1445 		flags = 0;
1446 		if (user_flags & R_OK)
1447 			flags |= VREAD;
1448 		if (user_flags & W_OK)
1449 			flags |= VWRITE;
1450 		if (user_flags & X_OK)
1451 			flags |= VEXEC;
1452 #ifdef MAC
1453 		error = mac_check_vnode_access(cred, vp, flags);
1454 		if (error)
1455 			return (error);
1456 #endif
1457 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1458 			error = VOP_ACCESS(vp, flags, cred, td);
1459 	}
1460 	return (error);
1461 }
1462 
1463 /*
1464  * Check access permissions using "real" credentials.
1465  */
1466 #ifndef _SYS_SYSPROTO_H_
1467 struct access_args {
1468 	char	*path;
1469 	int	flags;
1470 };
1471 #endif
1472 int
1473 access(td, uap)
1474 	struct thread *td;
1475 	register struct access_args /* {
1476 		char *path;
1477 		int flags;
1478 	} */ *uap;
1479 {
1480 
1481 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1482 }
1483 
1484 int
1485 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1486 {
1487 	struct ucred *cred, *tmpcred;
1488 	register struct vnode *vp;
1489 	int error;
1490 	struct nameidata nd;
1491 
1492 	/*
1493 	 * Create and modify a temporary credential instead of one that
1494 	 * is potentially shared.  This could also mess up socket
1495 	 * buffer accounting which can run in an interrupt context.
1496 	 *
1497 	 * XXX - Depending on how "threads" are finally implemented, it
1498 	 * may be better to explicitly pass the credential to namei()
1499 	 * rather than to modify the potentially shared process structure.
1500 	 */
1501 	cred = td->td_ucred;
1502 	tmpcred = crdup(cred);
1503 	tmpcred->cr_uid = cred->cr_ruid;
1504 	tmpcred->cr_groups[0] = cred->cr_rgid;
1505 	td->td_ucred = tmpcred;
1506 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1507 	if ((error = namei(&nd)) != 0)
1508 		goto out1;
1509 	vp = nd.ni_vp;
1510 
1511 	error = vn_access(vp, flags, tmpcred, td);
1512 	NDFREE(&nd, NDF_ONLY_PNBUF);
1513 	vput(vp);
1514 out1:
1515 	td->td_ucred = cred;
1516 	crfree(tmpcred);
1517 	return (error);
1518 }
1519 
1520 /*
1521  * Check access permissions using "effective" credentials.
1522  */
1523 #ifndef _SYS_SYSPROTO_H_
1524 struct eaccess_args {
1525 	char	*path;
1526 	int	flags;
1527 };
1528 #endif
1529 int
1530 eaccess(td, uap)
1531 	struct thread *td;
1532 	register struct eaccess_args /* {
1533 		char *path;
1534 		int flags;
1535 	} */ *uap;
1536 {
1537 	struct nameidata nd;
1538 	struct vnode *vp;
1539 	int error;
1540 
1541 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1542 	    uap->path, td);
1543 	if ((error = namei(&nd)) != 0)
1544 		return (error);
1545 	vp = nd.ni_vp;
1546 
1547 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1548 	NDFREE(&nd, NDF_ONLY_PNBUF);
1549 	vput(vp);
1550 	return (error);
1551 }
1552 
1553 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1554 /*
1555  * Get file status; this version follows links.
1556  */
1557 #ifndef _SYS_SYSPROTO_H_
1558 struct ostat_args {
1559 	char	*path;
1560 	struct ostat *ub;
1561 };
1562 #endif
1563 /* ARGSUSED */
1564 int
1565 ostat(td, uap)
1566 	struct thread *td;
1567 	register struct ostat_args /* {
1568 		char *path;
1569 		struct ostat *ub;
1570 	} */ *uap;
1571 {
1572 	struct stat sb;
1573 	struct ostat osb;
1574 	int error;
1575 	struct nameidata nd;
1576 
1577 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1578 	    uap->path, td);
1579 	if ((error = namei(&nd)) != 0)
1580 		return (error);
1581 	NDFREE(&nd, NDF_ONLY_PNBUF);
1582 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1583 	vput(nd.ni_vp);
1584 	if (error)
1585 		return (error);
1586 	cvtstat(&sb, &osb);
1587 	error = copyout(&osb, uap->ub, sizeof (osb));
1588 	return (error);
1589 }
1590 
1591 /*
1592  * Get file status; this version does not follow links.
1593  */
1594 #ifndef _SYS_SYSPROTO_H_
1595 struct olstat_args {
1596 	char	*path;
1597 	struct ostat *ub;
1598 };
1599 #endif
1600 /* ARGSUSED */
1601 int
1602 olstat(td, uap)
1603 	struct thread *td;
1604 	register struct olstat_args /* {
1605 		char *path;
1606 		struct ostat *ub;
1607 	} */ *uap;
1608 {
1609 	struct vnode *vp;
1610 	struct stat sb;
1611 	struct ostat osb;
1612 	int error;
1613 	struct nameidata nd;
1614 
1615 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1616 	    uap->path, td);
1617 	if ((error = namei(&nd)) != 0)
1618 		return (error);
1619 	vp = nd.ni_vp;
1620 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1621 	NDFREE(&nd, NDF_ONLY_PNBUF);
1622 	vput(vp);
1623 	if (error)
1624 		return (error);
1625 	cvtstat(&sb, &osb);
1626 	error = copyout(&osb, uap->ub, sizeof (osb));
1627 	return (error);
1628 }
1629 
1630 /*
1631  * Convert from an old to a new stat structure.
1632  */
1633 void
1634 cvtstat(st, ost)
1635 	struct stat *st;
1636 	struct ostat *ost;
1637 {
1638 
1639 	ost->st_dev = st->st_dev;
1640 	ost->st_ino = st->st_ino;
1641 	ost->st_mode = st->st_mode;
1642 	ost->st_nlink = st->st_nlink;
1643 	ost->st_uid = st->st_uid;
1644 	ost->st_gid = st->st_gid;
1645 	ost->st_rdev = st->st_rdev;
1646 	if (st->st_size < (quad_t)1 << 32)
1647 		ost->st_size = st->st_size;
1648 	else
1649 		ost->st_size = -2;
1650 	ost->st_atime = st->st_atime;
1651 	ost->st_mtime = st->st_mtime;
1652 	ost->st_ctime = st->st_ctime;
1653 	ost->st_blksize = st->st_blksize;
1654 	ost->st_blocks = st->st_blocks;
1655 	ost->st_flags = st->st_flags;
1656 	ost->st_gen = st->st_gen;
1657 }
1658 #endif /* COMPAT_43 || COMPAT_SUNOS */
1659 
1660 /*
1661  * Get file status; this version follows links.
1662  */
1663 #ifndef _SYS_SYSPROTO_H_
1664 struct stat_args {
1665 	char	*path;
1666 	struct stat *ub;
1667 };
1668 #endif
1669 /* ARGSUSED */
1670 int
1671 stat(td, uap)
1672 	struct thread *td;
1673 	register struct stat_args /* {
1674 		char *path;
1675 		struct stat *ub;
1676 	} */ *uap;
1677 {
1678 	struct stat sb;
1679 	int error;
1680 	struct nameidata nd;
1681 
1682 #ifdef LOOKUP_SHARED
1683 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1684 	    UIO_USERSPACE, uap->path, td);
1685 #else
1686 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1687 	    uap->path, td);
1688 #endif
1689 	if ((error = namei(&nd)) != 0)
1690 		return (error);
1691 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1692 	NDFREE(&nd, NDF_ONLY_PNBUF);
1693 	vput(nd.ni_vp);
1694 	if (error)
1695 		return (error);
1696 	error = copyout(&sb, uap->ub, sizeof (sb));
1697 	return (error);
1698 }
1699 
1700 /*
1701  * Get file status; this version does not follow links.
1702  */
1703 #ifndef _SYS_SYSPROTO_H_
1704 struct lstat_args {
1705 	char	*path;
1706 	struct stat *ub;
1707 };
1708 #endif
1709 /* ARGSUSED */
1710 int
1711 lstat(td, uap)
1712 	struct thread *td;
1713 	register struct lstat_args /* {
1714 		char *path;
1715 		struct stat *ub;
1716 	} */ *uap;
1717 {
1718 	int error;
1719 	struct vnode *vp;
1720 	struct stat sb;
1721 	struct nameidata nd;
1722 
1723 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1724 	    uap->path, td);
1725 	if ((error = namei(&nd)) != 0)
1726 		return (error);
1727 	vp = nd.ni_vp;
1728 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1729 	NDFREE(&nd, NDF_ONLY_PNBUF);
1730 	vput(vp);
1731 	if (error)
1732 		return (error);
1733 	error = copyout(&sb, uap->ub, sizeof (sb));
1734 	return (error);
1735 }
1736 
1737 /*
1738  * Implementation of the NetBSD stat() function.
1739  * XXX This should probably be collapsed with the FreeBSD version,
1740  * as the differences are only due to vn_stat() clearing spares at
1741  * the end of the structures.  vn_stat could be split to avoid this,
1742  * and thus collapse the following to close to zero code.
1743  */
1744 void
1745 cvtnstat(sb, nsb)
1746 	struct stat *sb;
1747 	struct nstat *nsb;
1748 {
1749 	bzero(nsb, sizeof *nsb);
1750 	nsb->st_dev = sb->st_dev;
1751 	nsb->st_ino = sb->st_ino;
1752 	nsb->st_mode = sb->st_mode;
1753 	nsb->st_nlink = sb->st_nlink;
1754 	nsb->st_uid = sb->st_uid;
1755 	nsb->st_gid = sb->st_gid;
1756 	nsb->st_rdev = sb->st_rdev;
1757 	nsb->st_atimespec = sb->st_atimespec;
1758 	nsb->st_mtimespec = sb->st_mtimespec;
1759 	nsb->st_ctimespec = sb->st_ctimespec;
1760 	nsb->st_size = sb->st_size;
1761 	nsb->st_blocks = sb->st_blocks;
1762 	nsb->st_blksize = sb->st_blksize;
1763 	nsb->st_flags = sb->st_flags;
1764 	nsb->st_gen = sb->st_gen;
1765 	nsb->st_birthtimespec = sb->st_birthtimespec;
1766 }
1767 
1768 #ifndef _SYS_SYSPROTO_H_
1769 struct nstat_args {
1770 	char	*path;
1771 	struct nstat *ub;
1772 };
1773 #endif
1774 /* ARGSUSED */
1775 int
1776 nstat(td, uap)
1777 	struct thread *td;
1778 	register struct nstat_args /* {
1779 		char *path;
1780 		struct nstat *ub;
1781 	} */ *uap;
1782 {
1783 	struct stat sb;
1784 	struct nstat nsb;
1785 	int error;
1786 	struct nameidata nd;
1787 
1788 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1789 	    uap->path, td);
1790 	if ((error = namei(&nd)) != 0)
1791 		return (error);
1792 	NDFREE(&nd, NDF_ONLY_PNBUF);
1793 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1794 	vput(nd.ni_vp);
1795 	if (error)
1796 		return (error);
1797 	cvtnstat(&sb, &nsb);
1798 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1799 	return (error);
1800 }
1801 
1802 /*
1803  * NetBSD lstat.  Get file status; this version does not follow links.
1804  */
1805 #ifndef _SYS_SYSPROTO_H_
1806 struct lstat_args {
1807 	char	*path;
1808 	struct stat *ub;
1809 };
1810 #endif
1811 /* ARGSUSED */
1812 int
1813 nlstat(td, uap)
1814 	struct thread *td;
1815 	register struct nlstat_args /* {
1816 		char *path;
1817 		struct nstat *ub;
1818 	} */ *uap;
1819 {
1820 	int error;
1821 	struct vnode *vp;
1822 	struct stat sb;
1823 	struct nstat nsb;
1824 	struct nameidata nd;
1825 
1826 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1827 	    uap->path, td);
1828 	if ((error = namei(&nd)) != 0)
1829 		return (error);
1830 	vp = nd.ni_vp;
1831 	NDFREE(&nd, NDF_ONLY_PNBUF);
1832 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1833 	vput(vp);
1834 	if (error)
1835 		return (error);
1836 	cvtnstat(&sb, &nsb);
1837 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1838 	return (error);
1839 }
1840 
1841 /*
1842  * Get configurable pathname variables.
1843  */
1844 #ifndef _SYS_SYSPROTO_H_
1845 struct pathconf_args {
1846 	char	*path;
1847 	int	name;
1848 };
1849 #endif
1850 /* ARGSUSED */
1851 int
1852 pathconf(td, uap)
1853 	struct thread *td;
1854 	register struct pathconf_args /* {
1855 		char *path;
1856 		int name;
1857 	} */ *uap;
1858 {
1859 	int error;
1860 	struct nameidata nd;
1861 
1862 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1863 	    uap->path, td);
1864 	if ((error = namei(&nd)) != 0)
1865 		return (error);
1866 	NDFREE(&nd, NDF_ONLY_PNBUF);
1867 
1868 	/* If asynchronous I/O is available, it works for all files. */
1869 	if (uap->name == _PC_ASYNC_IO)
1870 		td->td_retval[0] = async_io_version;
1871 	else
1872 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1873 	vput(nd.ni_vp);
1874 	return (error);
1875 }
1876 
1877 /*
1878  * Return target name of a symbolic link.
1879  */
1880 #ifndef _SYS_SYSPROTO_H_
1881 struct readlink_args {
1882 	char	*path;
1883 	char	*buf;
1884 	int	count;
1885 };
1886 #endif
1887 /* ARGSUSED */
1888 int
1889 readlink(td, uap)
1890 	struct thread *td;
1891 	register struct readlink_args /* {
1892 		char *path;
1893 		char *buf;
1894 		int count;
1895 	} */ *uap;
1896 {
1897 
1898 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1899 	    UIO_USERSPACE, uap->count));
1900 }
1901 
1902 int
1903 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1904     enum uio_seg bufseg, int count)
1905 {
1906 	register struct vnode *vp;
1907 	struct iovec aiov;
1908 	struct uio auio;
1909 	int error;
1910 	struct nameidata nd;
1911 
1912 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1913 	if ((error = namei(&nd)) != 0)
1914 		return (error);
1915 	NDFREE(&nd, NDF_ONLY_PNBUF);
1916 	vp = nd.ni_vp;
1917 #ifdef MAC
1918 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1919 	if (error) {
1920 		vput(vp);
1921 		return (error);
1922 	}
1923 #endif
1924 	if (vp->v_type != VLNK)
1925 		error = EINVAL;
1926 	else {
1927 		aiov.iov_base = buf;
1928 		aiov.iov_len = count;
1929 		auio.uio_iov = &aiov;
1930 		auio.uio_iovcnt = 1;
1931 		auio.uio_offset = 0;
1932 		auio.uio_rw = UIO_READ;
1933 		auio.uio_segflg = bufseg;
1934 		auio.uio_td = td;
1935 		auio.uio_resid = count;
1936 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1937 	}
1938 	vput(vp);
1939 	td->td_retval[0] = count - auio.uio_resid;
1940 	return (error);
1941 }
1942 
1943 /*
1944  * Common implementation code for chflags() and fchflags().
1945  */
1946 static int
1947 setfflags(td, vp, flags)
1948 	struct thread *td;
1949 	struct vnode *vp;
1950 	int flags;
1951 {
1952 	int error;
1953 	struct mount *mp;
1954 	struct vattr vattr;
1955 
1956 	/*
1957 	 * Prevent non-root users from setting flags on devices.  When
1958 	 * a device is reused, users can retain ownership of the device
1959 	 * if they are allowed to set flags and programs assume that
1960 	 * chown can't fail when done as root.
1961 	 */
1962 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1963 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1964 		if (error)
1965 			return (error);
1966 	}
1967 
1968 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1969 		return (error);
1970 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1971 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1972 	VATTR_NULL(&vattr);
1973 	vattr.va_flags = flags;
1974 #ifdef MAC
1975 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1976 	if (error == 0)
1977 #endif
1978 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1979 	VOP_UNLOCK(vp, 0, td);
1980 	vn_finished_write(mp);
1981 	return (error);
1982 }
1983 
1984 /*
1985  * Change flags of a file given a path name.
1986  */
1987 #ifndef _SYS_SYSPROTO_H_
1988 struct chflags_args {
1989 	char	*path;
1990 	int	flags;
1991 };
1992 #endif
1993 /* ARGSUSED */
1994 int
1995 chflags(td, uap)
1996 	struct thread *td;
1997 	register struct chflags_args /* {
1998 		char *path;
1999 		int flags;
2000 	} */ *uap;
2001 {
2002 	int error;
2003 	struct nameidata nd;
2004 
2005 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2006 	if ((error = namei(&nd)) != 0)
2007 		return (error);
2008 	NDFREE(&nd, NDF_ONLY_PNBUF);
2009 	error = setfflags(td, nd.ni_vp, uap->flags);
2010 	vrele(nd.ni_vp);
2011 	return error;
2012 }
2013 
2014 /*
2015  * Same as chflags() but doesn't follow symlinks.
2016  */
2017 int
2018 lchflags(td, uap)
2019 	struct thread *td;
2020 	register struct lchflags_args /* {
2021 		char *path;
2022 		int flags;
2023 	} */ *uap;
2024 {
2025 	int error;
2026 	struct nameidata nd;
2027 
2028 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2029 	if ((error = namei(&nd)) != 0)
2030 		return (error);
2031 	NDFREE(&nd, NDF_ONLY_PNBUF);
2032 	error = setfflags(td, nd.ni_vp, uap->flags);
2033 	vrele(nd.ni_vp);
2034 	return error;
2035 }
2036 
2037 /*
2038  * Change flags of a file given a file descriptor.
2039  */
2040 #ifndef _SYS_SYSPROTO_H_
2041 struct fchflags_args {
2042 	int	fd;
2043 	int	flags;
2044 };
2045 #endif
2046 /* ARGSUSED */
2047 int
2048 fchflags(td, uap)
2049 	struct thread *td;
2050 	register struct fchflags_args /* {
2051 		int fd;
2052 		int flags;
2053 	} */ *uap;
2054 {
2055 	struct file *fp;
2056 	int error;
2057 
2058 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2059 		return (error);
2060 	error = setfflags(td, fp->f_vnode, uap->flags);
2061 	fdrop(fp, td);
2062 	return (error);
2063 }
2064 
2065 /*
2066  * Common implementation code for chmod(), lchmod() and fchmod().
2067  */
2068 static int
2069 setfmode(td, vp, mode)
2070 	struct thread *td;
2071 	struct vnode *vp;
2072 	int mode;
2073 {
2074 	int error;
2075 	struct mount *mp;
2076 	struct vattr vattr;
2077 
2078 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2079 		return (error);
2080 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2081 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2082 	VATTR_NULL(&vattr);
2083 	vattr.va_mode = mode & ALLPERMS;
2084 #ifdef MAC
2085 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2086 	if (error == 0)
2087 #endif
2088 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2089 	VOP_UNLOCK(vp, 0, td);
2090 	vn_finished_write(mp);
2091 	return error;
2092 }
2093 
2094 /*
2095  * Change mode of a file given path name.
2096  */
2097 #ifndef _SYS_SYSPROTO_H_
2098 struct chmod_args {
2099 	char	*path;
2100 	int	mode;
2101 };
2102 #endif
2103 /* ARGSUSED */
2104 int
2105 chmod(td, uap)
2106 	struct thread *td;
2107 	register struct chmod_args /* {
2108 		char *path;
2109 		int mode;
2110 	} */ *uap;
2111 {
2112 
2113 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2114 }
2115 
2116 int
2117 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2118 {
2119 	int error;
2120 	struct nameidata nd;
2121 
2122 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2123 	if ((error = namei(&nd)) != 0)
2124 		return (error);
2125 	NDFREE(&nd, NDF_ONLY_PNBUF);
2126 	error = setfmode(td, nd.ni_vp, mode);
2127 	vrele(nd.ni_vp);
2128 	return error;
2129 }
2130 
2131 /*
2132  * Change mode of a file given path name (don't follow links.)
2133  */
2134 #ifndef _SYS_SYSPROTO_H_
2135 struct lchmod_args {
2136 	char	*path;
2137 	int	mode;
2138 };
2139 #endif
2140 /* ARGSUSED */
2141 int
2142 lchmod(td, uap)
2143 	struct thread *td;
2144 	register struct lchmod_args /* {
2145 		char *path;
2146 		int mode;
2147 	} */ *uap;
2148 {
2149 	int error;
2150 	struct nameidata nd;
2151 
2152 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2153 	if ((error = namei(&nd)) != 0)
2154 		return (error);
2155 	NDFREE(&nd, NDF_ONLY_PNBUF);
2156 	error = setfmode(td, nd.ni_vp, uap->mode);
2157 	vrele(nd.ni_vp);
2158 	return error;
2159 }
2160 
2161 /*
2162  * Change mode of a file given a file descriptor.
2163  */
2164 #ifndef _SYS_SYSPROTO_H_
2165 struct fchmod_args {
2166 	int	fd;
2167 	int	mode;
2168 };
2169 #endif
2170 /* ARGSUSED */
2171 int
2172 fchmod(td, uap)
2173 	struct thread *td;
2174 	register struct fchmod_args /* {
2175 		int fd;
2176 		int mode;
2177 	} */ *uap;
2178 {
2179 	struct file *fp;
2180 	int error;
2181 
2182 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2183 		return (error);
2184 	error = setfmode(td, fp->f_vnode, uap->mode);
2185 	fdrop(fp, td);
2186 	return (error);
2187 }
2188 
2189 /*
2190  * Common implementation for chown(), lchown(), and fchown()
2191  */
2192 static int
2193 setfown(td, vp, uid, gid)
2194 	struct thread *td;
2195 	struct vnode *vp;
2196 	uid_t uid;
2197 	gid_t gid;
2198 {
2199 	int error;
2200 	struct mount *mp;
2201 	struct vattr vattr;
2202 
2203 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2204 		return (error);
2205 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2206 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2207 	VATTR_NULL(&vattr);
2208 	vattr.va_uid = uid;
2209 	vattr.va_gid = gid;
2210 #ifdef MAC
2211 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2212 	    vattr.va_gid);
2213 	if (error == 0)
2214 #endif
2215 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2216 	VOP_UNLOCK(vp, 0, td);
2217 	vn_finished_write(mp);
2218 	return error;
2219 }
2220 
2221 /*
2222  * Set ownership given a path name.
2223  */
2224 #ifndef _SYS_SYSPROTO_H_
2225 struct chown_args {
2226 	char	*path;
2227 	int	uid;
2228 	int	gid;
2229 };
2230 #endif
2231 /* ARGSUSED */
2232 int
2233 chown(td, uap)
2234 	struct thread *td;
2235 	register struct chown_args /* {
2236 		char *path;
2237 		int uid;
2238 		int gid;
2239 	} */ *uap;
2240 {
2241 
2242 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2243 }
2244 
2245 int
2246 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2247     int gid)
2248 {
2249 	int error;
2250 	struct nameidata nd;
2251 
2252 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2253 	if ((error = namei(&nd)) != 0)
2254 		return (error);
2255 	NDFREE(&nd, NDF_ONLY_PNBUF);
2256 	error = setfown(td, nd.ni_vp, uid, gid);
2257 	vrele(nd.ni_vp);
2258 	return (error);
2259 }
2260 
2261 /*
2262  * Set ownership given a path name, do not cross symlinks.
2263  */
2264 #ifndef _SYS_SYSPROTO_H_
2265 struct lchown_args {
2266 	char	*path;
2267 	int	uid;
2268 	int	gid;
2269 };
2270 #endif
2271 /* ARGSUSED */
2272 int
2273 lchown(td, uap)
2274 	struct thread *td;
2275 	register struct lchown_args /* {
2276 		char *path;
2277 		int uid;
2278 		int gid;
2279 	} */ *uap;
2280 {
2281 
2282 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2283 }
2284 
2285 int
2286 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2287     int gid)
2288 {
2289 	int error;
2290 	struct nameidata nd;
2291 
2292 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2293 	if ((error = namei(&nd)) != 0)
2294 		return (error);
2295 	NDFREE(&nd, NDF_ONLY_PNBUF);
2296 	error = setfown(td, nd.ni_vp, uid, gid);
2297 	vrele(nd.ni_vp);
2298 	return (error);
2299 }
2300 
2301 /*
2302  * Set ownership given a file descriptor.
2303  */
2304 #ifndef _SYS_SYSPROTO_H_
2305 struct fchown_args {
2306 	int	fd;
2307 	int	uid;
2308 	int	gid;
2309 };
2310 #endif
2311 /* ARGSUSED */
2312 int
2313 fchown(td, uap)
2314 	struct thread *td;
2315 	register struct fchown_args /* {
2316 		int fd;
2317 		int uid;
2318 		int gid;
2319 	} */ *uap;
2320 {
2321 	struct file *fp;
2322 	int error;
2323 
2324 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2325 		return (error);
2326 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2327 	fdrop(fp, td);
2328 	return (error);
2329 }
2330 
2331 /*
2332  * Common implementation code for utimes(), lutimes(), and futimes().
2333  */
2334 static int
2335 getutimes(usrtvp, tvpseg, tsp)
2336 	const struct timeval *usrtvp;
2337 	enum uio_seg tvpseg;
2338 	struct timespec *tsp;
2339 {
2340 	struct timeval tv[2];
2341 	const struct timeval *tvp;
2342 	int error;
2343 
2344 	if (usrtvp == NULL) {
2345 		microtime(&tv[0]);
2346 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2347 		tsp[1] = tsp[0];
2348 	} else {
2349 		if (tvpseg == UIO_SYSSPACE) {
2350 			tvp = usrtvp;
2351 		} else {
2352 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2353 				return (error);
2354 			tvp = tv;
2355 		}
2356 
2357 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2358 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2359 	}
2360 	return 0;
2361 }
2362 
2363 /*
2364  * Common implementation code for utimes(), lutimes(), and futimes().
2365  */
2366 static int
2367 setutimes(td, vp, ts, numtimes, nullflag)
2368 	struct thread *td;
2369 	struct vnode *vp;
2370 	const struct timespec *ts;
2371 	int numtimes;
2372 	int nullflag;
2373 {
2374 	int error, setbirthtime;
2375 	struct mount *mp;
2376 	struct vattr vattr;
2377 
2378 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2379 		return (error);
2380 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2381 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2382 	setbirthtime = 0;
2383 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2384 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2385 		setbirthtime = 1;
2386 	VATTR_NULL(&vattr);
2387 	vattr.va_atime = ts[0];
2388 	vattr.va_mtime = ts[1];
2389 	if (setbirthtime)
2390 		vattr.va_birthtime = ts[1];
2391 	if (numtimes > 2)
2392 		vattr.va_birthtime = ts[2];
2393 	if (nullflag)
2394 		vattr.va_vaflags |= VA_UTIMES_NULL;
2395 #ifdef MAC
2396 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2397 	    vattr.va_mtime);
2398 #endif
2399 	if (error == 0)
2400 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2401 	VOP_UNLOCK(vp, 0, td);
2402 	vn_finished_write(mp);
2403 	return error;
2404 }
2405 
2406 /*
2407  * Set the access and modification times of a file.
2408  */
2409 #ifndef _SYS_SYSPROTO_H_
2410 struct utimes_args {
2411 	char	*path;
2412 	struct	timeval *tptr;
2413 };
2414 #endif
2415 /* ARGSUSED */
2416 int
2417 utimes(td, uap)
2418 	struct thread *td;
2419 	register struct utimes_args /* {
2420 		char *path;
2421 		struct timeval *tptr;
2422 	} */ *uap;
2423 {
2424 
2425 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2426 	    UIO_USERSPACE));
2427 }
2428 
2429 int
2430 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2431     struct timeval *tptr, enum uio_seg tptrseg)
2432 {
2433 	struct timespec ts[2];
2434 	int error;
2435 	struct nameidata nd;
2436 
2437 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2438 		return (error);
2439 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2440 	if ((error = namei(&nd)) != 0)
2441 		return (error);
2442 	NDFREE(&nd, NDF_ONLY_PNBUF);
2443 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2444 	vrele(nd.ni_vp);
2445 	return (error);
2446 }
2447 
2448 /*
2449  * Set the access and modification times of a file.
2450  */
2451 #ifndef _SYS_SYSPROTO_H_
2452 struct lutimes_args {
2453 	char	*path;
2454 	struct	timeval *tptr;
2455 };
2456 #endif
2457 /* ARGSUSED */
2458 int
2459 lutimes(td, uap)
2460 	struct thread *td;
2461 	register struct lutimes_args /* {
2462 		char *path;
2463 		struct timeval *tptr;
2464 	} */ *uap;
2465 {
2466 
2467 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2468 	    UIO_USERSPACE));
2469 }
2470 
2471 int
2472 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2473     struct timeval *tptr, enum uio_seg tptrseg)
2474 {
2475 	struct timespec ts[2];
2476 	int error;
2477 	struct nameidata nd;
2478 
2479 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2480 		return (error);
2481 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2482 	if ((error = namei(&nd)) != 0)
2483 		return (error);
2484 	NDFREE(&nd, NDF_ONLY_PNBUF);
2485 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2486 	vrele(nd.ni_vp);
2487 	return (error);
2488 }
2489 
2490 /*
2491  * Set the access and modification times of a file.
2492  */
2493 #ifndef _SYS_SYSPROTO_H_
2494 struct futimes_args {
2495 	int	fd;
2496 	struct	timeval *tptr;
2497 };
2498 #endif
2499 /* ARGSUSED */
2500 int
2501 futimes(td, uap)
2502 	struct thread *td;
2503 	register struct futimes_args /* {
2504 		int  fd;
2505 		struct timeval *tptr;
2506 	} */ *uap;
2507 {
2508 
2509 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2510 }
2511 
2512 int
2513 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2514     enum uio_seg tptrseg)
2515 {
2516 	struct timespec ts[2];
2517 	struct file *fp;
2518 	int error;
2519 
2520 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2521 		return (error);
2522 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2523 		return (error);
2524 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2525 	fdrop(fp, td);
2526 	return (error);
2527 }
2528 
2529 /*
2530  * Truncate a file given its path name.
2531  */
2532 #ifndef _SYS_SYSPROTO_H_
2533 struct truncate_args {
2534 	char	*path;
2535 	int	pad;
2536 	off_t	length;
2537 };
2538 #endif
2539 /* ARGSUSED */
2540 int
2541 truncate(td, uap)
2542 	struct thread *td;
2543 	register struct truncate_args /* {
2544 		char *path;
2545 		int pad;
2546 		off_t length;
2547 	} */ *uap;
2548 {
2549 
2550 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2551 }
2552 
2553 int
2554 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2555 {
2556 	struct mount *mp;
2557 	struct vnode *vp;
2558 	struct vattr vattr;
2559 	int error;
2560 	struct nameidata nd;
2561 
2562 	if (length < 0)
2563 		return(EINVAL);
2564 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2565 	if ((error = namei(&nd)) != 0)
2566 		return (error);
2567 	vp = nd.ni_vp;
2568 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2569 		vrele(vp);
2570 		return (error);
2571 	}
2572 	NDFREE(&nd, NDF_ONLY_PNBUF);
2573 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2574 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2575 	if (vp->v_type == VDIR)
2576 		error = EISDIR;
2577 #ifdef MAC
2578 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2579 	}
2580 #endif
2581 	else if ((error = vn_writechk(vp)) == 0 &&
2582 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2583 		VATTR_NULL(&vattr);
2584 		vattr.va_size = length;
2585 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2586 	}
2587 	vput(vp);
2588 	vn_finished_write(mp);
2589 	return (error);
2590 }
2591 
2592 /*
2593  * Truncate a file given a file descriptor.
2594  */
2595 #ifndef _SYS_SYSPROTO_H_
2596 struct ftruncate_args {
2597 	int	fd;
2598 	int	pad;
2599 	off_t	length;
2600 };
2601 #endif
2602 /* ARGSUSED */
2603 int
2604 ftruncate(td, uap)
2605 	struct thread *td;
2606 	register struct ftruncate_args /* {
2607 		int fd;
2608 		int pad;
2609 		off_t length;
2610 	} */ *uap;
2611 {
2612 	struct mount *mp;
2613 	struct vattr vattr;
2614 	struct vnode *vp;
2615 	struct file *fp;
2616 	int error;
2617 
2618 	if (uap->length < 0)
2619 		return(EINVAL);
2620 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2621 		return (error);
2622 	if ((fp->f_flag & FWRITE) == 0) {
2623 		fdrop(fp, td);
2624 		return (EINVAL);
2625 	}
2626 	vp = fp->f_vnode;
2627 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2628 		fdrop(fp, td);
2629 		return (error);
2630 	}
2631 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2632 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2633 	if (vp->v_type == VDIR)
2634 		error = EISDIR;
2635 #ifdef MAC
2636 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2637 	    vp))) {
2638 	}
2639 #endif
2640 	else if ((error = vn_writechk(vp)) == 0) {
2641 		VATTR_NULL(&vattr);
2642 		vattr.va_size = uap->length;
2643 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2644 	}
2645 	VOP_UNLOCK(vp, 0, td);
2646 	vn_finished_write(mp);
2647 	fdrop(fp, td);
2648 	return (error);
2649 }
2650 
2651 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2652 /*
2653  * Truncate a file given its path name.
2654  */
2655 #ifndef _SYS_SYSPROTO_H_
2656 struct otruncate_args {
2657 	char	*path;
2658 	long	length;
2659 };
2660 #endif
2661 /* ARGSUSED */
2662 int
2663 otruncate(td, uap)
2664 	struct thread *td;
2665 	register struct otruncate_args /* {
2666 		char *path;
2667 		long length;
2668 	} */ *uap;
2669 {
2670 	struct truncate_args /* {
2671 		char *path;
2672 		int pad;
2673 		off_t length;
2674 	} */ nuap;
2675 
2676 	nuap.path = uap->path;
2677 	nuap.length = uap->length;
2678 	return (truncate(td, &nuap));
2679 }
2680 
2681 /*
2682  * Truncate a file given a file descriptor.
2683  */
2684 #ifndef _SYS_SYSPROTO_H_
2685 struct oftruncate_args {
2686 	int	fd;
2687 	long	length;
2688 };
2689 #endif
2690 /* ARGSUSED */
2691 int
2692 oftruncate(td, uap)
2693 	struct thread *td;
2694 	register struct oftruncate_args /* {
2695 		int fd;
2696 		long length;
2697 	} */ *uap;
2698 {
2699 	struct ftruncate_args /* {
2700 		int fd;
2701 		int pad;
2702 		off_t length;
2703 	} */ nuap;
2704 
2705 	nuap.fd = uap->fd;
2706 	nuap.length = uap->length;
2707 	return (ftruncate(td, &nuap));
2708 }
2709 #endif /* COMPAT_43 || COMPAT_SUNOS */
2710 
2711 /*
2712  * Sync an open file.
2713  */
2714 #ifndef _SYS_SYSPROTO_H_
2715 struct fsync_args {
2716 	int	fd;
2717 };
2718 #endif
2719 /* ARGSUSED */
2720 int
2721 fsync(td, uap)
2722 	struct thread *td;
2723 	struct fsync_args /* {
2724 		int fd;
2725 	} */ *uap;
2726 {
2727 	struct vnode *vp;
2728 	struct mount *mp;
2729 	struct file *fp;
2730 	vm_object_t obj;
2731 	int error;
2732 
2733 	GIANT_REQUIRED;
2734 
2735 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2736 		return (error);
2737 	vp = fp->f_vnode;
2738 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2739 		fdrop(fp, td);
2740 		return (error);
2741 	}
2742 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2743 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2744 		VM_OBJECT_LOCK(obj);
2745 		vm_object_page_clean(obj, 0, 0, 0);
2746 		VM_OBJECT_UNLOCK(obj);
2747 	}
2748 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2749 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2750 	    && softdep_fsync_hook != NULL)
2751 		error = (*softdep_fsync_hook)(vp);
2752 
2753 	VOP_UNLOCK(vp, 0, td);
2754 	vn_finished_write(mp);
2755 	fdrop(fp, td);
2756 	return (error);
2757 }
2758 
2759 /*
2760  * Rename files.  Source and destination must either both be directories,
2761  * or both not be directories.  If target is a directory, it must be empty.
2762  */
2763 #ifndef _SYS_SYSPROTO_H_
2764 struct rename_args {
2765 	char	*from;
2766 	char	*to;
2767 };
2768 #endif
2769 /* ARGSUSED */
2770 int
2771 rename(td, uap)
2772 	struct thread *td;
2773 	register struct rename_args /* {
2774 		char *from;
2775 		char *to;
2776 	} */ *uap;
2777 {
2778 
2779 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2780 }
2781 
2782 int
2783 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2784 {
2785 	struct mount *mp = NULL;
2786 	struct vnode *tvp, *fvp, *tdvp;
2787 	struct nameidata fromnd, tond;
2788 	int error;
2789 
2790 	bwillwrite();
2791 #ifdef MAC
2792 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2793 	    from, td);
2794 #else
2795 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2796 #endif
2797 	if ((error = namei(&fromnd)) != 0)
2798 		return (error);
2799 #ifdef MAC
2800 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2801 	    fromnd.ni_vp, &fromnd.ni_cnd);
2802 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2803 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2804 #endif
2805 	fvp = fromnd.ni_vp;
2806 	if (error == 0)
2807 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2808 	if (error != 0) {
2809 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2810 		vrele(fromnd.ni_dvp);
2811 		vrele(fvp);
2812 		goto out1;
2813 	}
2814 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2815 	    NOOBJ, pathseg, to, td);
2816 	if (fromnd.ni_vp->v_type == VDIR)
2817 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2818 	if ((error = namei(&tond)) != 0) {
2819 		/* Translate error code for rename("dir1", "dir2/."). */
2820 		if (error == EISDIR && fvp->v_type == VDIR)
2821 			error = EINVAL;
2822 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2823 		vrele(fromnd.ni_dvp);
2824 		vrele(fvp);
2825 		goto out1;
2826 	}
2827 	tdvp = tond.ni_dvp;
2828 	tvp = tond.ni_vp;
2829 	if (tvp != NULL) {
2830 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2831 			error = ENOTDIR;
2832 			goto out;
2833 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2834 			error = EISDIR;
2835 			goto out;
2836 		}
2837 	}
2838 	if (fvp == tdvp)
2839 		error = EINVAL;
2840 	/*
2841 	 * If the source is the same as the destination (that is, if they
2842 	 * are links to the same vnode), then there is nothing to do.
2843 	 */
2844 	if (fvp == tvp)
2845 		error = -1;
2846 #ifdef MAC
2847 	else
2848 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2849 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2850 #endif
2851 out:
2852 	if (!error) {
2853 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2854 		if (fromnd.ni_dvp != tdvp) {
2855 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2856 		}
2857 		if (tvp) {
2858 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2859 		}
2860 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2861 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2862 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2863 		NDFREE(&tond, NDF_ONLY_PNBUF);
2864 	} else {
2865 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2866 		NDFREE(&tond, NDF_ONLY_PNBUF);
2867 		if (tdvp == tvp)
2868 			vrele(tdvp);
2869 		else
2870 			vput(tdvp);
2871 		if (tvp)
2872 			vput(tvp);
2873 		vrele(fromnd.ni_dvp);
2874 		vrele(fvp);
2875 	}
2876 	vrele(tond.ni_startdir);
2877 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2878 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2879 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2880 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2881 out1:
2882 	vn_finished_write(mp);
2883 	if (fromnd.ni_startdir)
2884 		vrele(fromnd.ni_startdir);
2885 	if (error == -1)
2886 		return (0);
2887 	return (error);
2888 }
2889 
2890 /*
2891  * Make a directory file.
2892  */
2893 #ifndef _SYS_SYSPROTO_H_
2894 struct mkdir_args {
2895 	char	*path;
2896 	int	mode;
2897 };
2898 #endif
2899 /* ARGSUSED */
2900 int
2901 mkdir(td, uap)
2902 	struct thread *td;
2903 	register struct mkdir_args /* {
2904 		char *path;
2905 		int mode;
2906 	} */ *uap;
2907 {
2908 
2909 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2910 }
2911 
2912 int
2913 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2914 {
2915 	struct mount *mp;
2916 	struct vnode *vp;
2917 	struct vattr vattr;
2918 	int error;
2919 	struct nameidata nd;
2920 
2921 restart:
2922 	bwillwrite();
2923 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2924 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2925 	if ((error = namei(&nd)) != 0)
2926 		return (error);
2927 	vp = nd.ni_vp;
2928 	if (vp != NULL) {
2929 		NDFREE(&nd, NDF_ONLY_PNBUF);
2930 		vrele(vp);
2931 		/*
2932 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2933 		 * the strange behaviour of leaving the vnode unlocked
2934 		 * if the target is the same vnode as the parent.
2935 		 */
2936 		if (vp == nd.ni_dvp)
2937 			vrele(nd.ni_dvp);
2938 		else
2939 			vput(nd.ni_dvp);
2940 		return (EEXIST);
2941 	}
2942 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2943 		NDFREE(&nd, NDF_ONLY_PNBUF);
2944 		vput(nd.ni_dvp);
2945 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2946 			return (error);
2947 		goto restart;
2948 	}
2949 	VATTR_NULL(&vattr);
2950 	vattr.va_type = VDIR;
2951 	FILEDESC_LOCK(td->td_proc->p_fd);
2952 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2953 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2954 #ifdef MAC
2955 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2956 	    &vattr);
2957 	if (error)
2958 		goto out;
2959 #endif
2960 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2961 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2962 #ifdef MAC
2963 out:
2964 #endif
2965 	NDFREE(&nd, NDF_ONLY_PNBUF);
2966 	vput(nd.ni_dvp);
2967 	if (!error)
2968 		vput(nd.ni_vp);
2969 	vn_finished_write(mp);
2970 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2971 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2972 	return (error);
2973 }
2974 
2975 /*
2976  * Remove a directory file.
2977  */
2978 #ifndef _SYS_SYSPROTO_H_
2979 struct rmdir_args {
2980 	char	*path;
2981 };
2982 #endif
2983 /* ARGSUSED */
2984 int
2985 rmdir(td, uap)
2986 	struct thread *td;
2987 	struct rmdir_args /* {
2988 		char *path;
2989 	} */ *uap;
2990 {
2991 
2992 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2993 }
2994 
2995 int
2996 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2997 {
2998 	struct mount *mp;
2999 	struct vnode *vp;
3000 	int error;
3001 	struct nameidata nd;
3002 
3003 restart:
3004 	bwillwrite();
3005 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3006 	if ((error = namei(&nd)) != 0)
3007 		return (error);
3008 	vp = nd.ni_vp;
3009 	if (vp->v_type != VDIR) {
3010 		error = ENOTDIR;
3011 		goto out;
3012 	}
3013 	/*
3014 	 * No rmdir "." please.
3015 	 */
3016 	if (nd.ni_dvp == vp) {
3017 		error = EINVAL;
3018 		goto out;
3019 	}
3020 	/*
3021 	 * The root of a mounted filesystem cannot be deleted.
3022 	 */
3023 	if (vp->v_vflag & VV_ROOT) {
3024 		error = EBUSY;
3025 		goto out;
3026 	}
3027 #ifdef MAC
3028 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3029 	    &nd.ni_cnd);
3030 	if (error)
3031 		goto out;
3032 #endif
3033 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3034 		NDFREE(&nd, NDF_ONLY_PNBUF);
3035 		if (nd.ni_dvp == vp)
3036 			vrele(nd.ni_dvp);
3037 		else
3038 			vput(nd.ni_dvp);
3039 		vput(vp);
3040 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3041 			return (error);
3042 		goto restart;
3043 	}
3044 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3045 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3046 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3047 	vn_finished_write(mp);
3048 out:
3049 	NDFREE(&nd, NDF_ONLY_PNBUF);
3050 	if (nd.ni_dvp == vp)
3051 		vrele(nd.ni_dvp);
3052 	else
3053 		vput(nd.ni_dvp);
3054 	vput(vp);
3055 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3056 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3057 	return (error);
3058 }
3059 
3060 #ifdef COMPAT_43
3061 /*
3062  * Read a block of directory entries in a filesystem independent format.
3063  */
3064 #ifndef _SYS_SYSPROTO_H_
3065 struct ogetdirentries_args {
3066 	int	fd;
3067 	char	*buf;
3068 	u_int	count;
3069 	long	*basep;
3070 };
3071 #endif
3072 int
3073 ogetdirentries(td, uap)
3074 	struct thread *td;
3075 	register struct ogetdirentries_args /* {
3076 		int fd;
3077 		char *buf;
3078 		u_int count;
3079 		long *basep;
3080 	} */ *uap;
3081 {
3082 	struct vnode *vp;
3083 	struct file *fp;
3084 	struct uio auio, kuio;
3085 	struct iovec aiov, kiov;
3086 	struct dirent *dp, *edp;
3087 	caddr_t dirbuf;
3088 	int error, eofflag, readcnt;
3089 	long loff;
3090 
3091 	/* XXX arbitrary sanity limit on `count'. */
3092 	if (uap->count > 64 * 1024)
3093 		return (EINVAL);
3094 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3095 		return (error);
3096 	if ((fp->f_flag & FREAD) == 0) {
3097 		fdrop(fp, td);
3098 		return (EBADF);
3099 	}
3100 	vp = fp->f_vnode;
3101 unionread:
3102 	if (vp->v_type != VDIR) {
3103 		fdrop(fp, td);
3104 		return (EINVAL);
3105 	}
3106 	aiov.iov_base = uap->buf;
3107 	aiov.iov_len = uap->count;
3108 	auio.uio_iov = &aiov;
3109 	auio.uio_iovcnt = 1;
3110 	auio.uio_rw = UIO_READ;
3111 	auio.uio_segflg = UIO_USERSPACE;
3112 	auio.uio_td = td;
3113 	auio.uio_resid = uap->count;
3114 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3115 	loff = auio.uio_offset = fp->f_offset;
3116 #ifdef MAC
3117 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3118 	if (error) {
3119 		VOP_UNLOCK(vp, 0, td);
3120 		fdrop(fp, td);
3121 		return (error);
3122 	}
3123 #endif
3124 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3125 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3126 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3127 			    NULL, NULL);
3128 			fp->f_offset = auio.uio_offset;
3129 		} else
3130 #	endif
3131 	{
3132 		kuio = auio;
3133 		kuio.uio_iov = &kiov;
3134 		kuio.uio_segflg = UIO_SYSSPACE;
3135 		kiov.iov_len = uap->count;
3136 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3137 		kiov.iov_base = dirbuf;
3138 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3139 			    NULL, NULL);
3140 		fp->f_offset = kuio.uio_offset;
3141 		if (error == 0) {
3142 			readcnt = uap->count - kuio.uio_resid;
3143 			edp = (struct dirent *)&dirbuf[readcnt];
3144 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3145 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3146 					/*
3147 					 * The expected low byte of
3148 					 * dp->d_namlen is our dp->d_type.
3149 					 * The high MBZ byte of dp->d_namlen
3150 					 * is our dp->d_namlen.
3151 					 */
3152 					dp->d_type = dp->d_namlen;
3153 					dp->d_namlen = 0;
3154 #				else
3155 					/*
3156 					 * The dp->d_type is the high byte
3157 					 * of the expected dp->d_namlen,
3158 					 * so must be zero'ed.
3159 					 */
3160 					dp->d_type = 0;
3161 #				endif
3162 				if (dp->d_reclen > 0) {
3163 					dp = (struct dirent *)
3164 					    ((char *)dp + dp->d_reclen);
3165 				} else {
3166 					error = EIO;
3167 					break;
3168 				}
3169 			}
3170 			if (dp >= edp)
3171 				error = uiomove(dirbuf, readcnt, &auio);
3172 		}
3173 		FREE(dirbuf, M_TEMP);
3174 	}
3175 	VOP_UNLOCK(vp, 0, td);
3176 	if (error) {
3177 		fdrop(fp, td);
3178 		return (error);
3179 	}
3180 	if (uap->count == auio.uio_resid) {
3181 		if (union_dircheckp) {
3182 			error = union_dircheckp(td, &vp, fp);
3183 			if (error == -1)
3184 				goto unionread;
3185 			if (error) {
3186 				fdrop(fp, td);
3187 				return (error);
3188 			}
3189 		}
3190 		/*
3191 		 * XXX We could delay dropping the lock above but
3192 		 * union_dircheckp complicates things.
3193 		 */
3194 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3195 		if ((vp->v_vflag & VV_ROOT) &&
3196 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3197 			struct vnode *tvp = vp;
3198 			vp = vp->v_mount->mnt_vnodecovered;
3199 			VREF(vp);
3200 			fp->f_vnode = vp;
3201 			fp->f_data = vp;
3202 			fp->f_offset = 0;
3203 			vput(tvp);
3204 			goto unionread;
3205 		}
3206 		VOP_UNLOCK(vp, 0, td);
3207 	}
3208 	error = copyout(&loff, uap->basep, sizeof(long));
3209 	fdrop(fp, td);
3210 	td->td_retval[0] = uap->count - auio.uio_resid;
3211 	return (error);
3212 }
3213 #endif /* COMPAT_43 */
3214 
3215 /*
3216  * Read a block of directory entries in a filesystem independent format.
3217  */
3218 #ifndef _SYS_SYSPROTO_H_
3219 struct getdirentries_args {
3220 	int	fd;
3221 	char	*buf;
3222 	u_int	count;
3223 	long	*basep;
3224 };
3225 #endif
3226 int
3227 getdirentries(td, uap)
3228 	struct thread *td;
3229 	register struct getdirentries_args /* {
3230 		int fd;
3231 		char *buf;
3232 		u_int count;
3233 		long *basep;
3234 	} */ *uap;
3235 {
3236 	struct vnode *vp;
3237 	struct file *fp;
3238 	struct uio auio;
3239 	struct iovec aiov;
3240 	long loff;
3241 	int error, eofflag;
3242 
3243 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3244 		return (error);
3245 	if ((fp->f_flag & FREAD) == 0) {
3246 		fdrop(fp, td);
3247 		return (EBADF);
3248 	}
3249 	vp = fp->f_vnode;
3250 unionread:
3251 	if (vp->v_type != VDIR) {
3252 		fdrop(fp, td);
3253 		return (EINVAL);
3254 	}
3255 	aiov.iov_base = uap->buf;
3256 	aiov.iov_len = uap->count;
3257 	auio.uio_iov = &aiov;
3258 	auio.uio_iovcnt = 1;
3259 	auio.uio_rw = UIO_READ;
3260 	auio.uio_segflg = UIO_USERSPACE;
3261 	auio.uio_td = td;
3262 	auio.uio_resid = uap->count;
3263 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3264 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3265 	loff = auio.uio_offset = fp->f_offset;
3266 #ifdef MAC
3267 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3268 	if (error == 0)
3269 #endif
3270 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3271 		    NULL);
3272 	fp->f_offset = auio.uio_offset;
3273 	VOP_UNLOCK(vp, 0, td);
3274 	if (error) {
3275 		fdrop(fp, td);
3276 		return (error);
3277 	}
3278 	if (uap->count == auio.uio_resid) {
3279 		if (union_dircheckp) {
3280 			error = union_dircheckp(td, &vp, fp);
3281 			if (error == -1)
3282 				goto unionread;
3283 			if (error) {
3284 				fdrop(fp, td);
3285 				return (error);
3286 			}
3287 		}
3288 		/*
3289 		 * XXX We could delay dropping the lock above but
3290 		 * union_dircheckp complicates things.
3291 		 */
3292 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3293 		if ((vp->v_vflag & VV_ROOT) &&
3294 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3295 			struct vnode *tvp = vp;
3296 			vp = vp->v_mount->mnt_vnodecovered;
3297 			VREF(vp);
3298 			fp->f_vnode = vp;
3299 			fp->f_data = vp;
3300 			fp->f_offset = 0;
3301 			vput(tvp);
3302 			goto unionread;
3303 		}
3304 		VOP_UNLOCK(vp, 0, td);
3305 	}
3306 	if (uap->basep != NULL) {
3307 		error = copyout(&loff, uap->basep, sizeof(long));
3308 	}
3309 	td->td_retval[0] = uap->count - auio.uio_resid;
3310 	fdrop(fp, td);
3311 	return (error);
3312 }
3313 #ifndef _SYS_SYSPROTO_H_
3314 struct getdents_args {
3315 	int fd;
3316 	char *buf;
3317 	size_t count;
3318 };
3319 #endif
3320 int
3321 getdents(td, uap)
3322 	struct thread *td;
3323 	register struct getdents_args /* {
3324 		int fd;
3325 		char *buf;
3326 		u_int count;
3327 	} */ *uap;
3328 {
3329 	struct getdirentries_args ap;
3330 	ap.fd = uap->fd;
3331 	ap.buf = uap->buf;
3332 	ap.count = uap->count;
3333 	ap.basep = NULL;
3334 	return getdirentries(td, &ap);
3335 }
3336 
3337 /*
3338  * Set the mode mask for creation of filesystem nodes.
3339  *
3340  * MP SAFE
3341  */
3342 #ifndef _SYS_SYSPROTO_H_
3343 struct umask_args {
3344 	int	newmask;
3345 };
3346 #endif
3347 int
3348 umask(td, uap)
3349 	struct thread *td;
3350 	struct umask_args /* {
3351 		int newmask;
3352 	} */ *uap;
3353 {
3354 	register struct filedesc *fdp;
3355 
3356 	FILEDESC_LOCK(td->td_proc->p_fd);
3357 	fdp = td->td_proc->p_fd;
3358 	td->td_retval[0] = fdp->fd_cmask;
3359 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3360 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3361 	return (0);
3362 }
3363 
3364 /*
3365  * Void all references to file by ripping underlying filesystem
3366  * away from vnode.
3367  */
3368 #ifndef _SYS_SYSPROTO_H_
3369 struct revoke_args {
3370 	char	*path;
3371 };
3372 #endif
3373 /* ARGSUSED */
3374 int
3375 revoke(td, uap)
3376 	struct thread *td;
3377 	register struct revoke_args /* {
3378 		char *path;
3379 	} */ *uap;
3380 {
3381 	struct mount *mp;
3382 	struct vnode *vp;
3383 	struct vattr vattr;
3384 	int error;
3385 	struct nameidata nd;
3386 
3387 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3388 	if ((error = namei(&nd)) != 0)
3389 		return (error);
3390 	vp = nd.ni_vp;
3391 	NDFREE(&nd, NDF_ONLY_PNBUF);
3392 	if (vp->v_type != VCHR) {
3393 		vput(vp);
3394 		return (EINVAL);
3395 	}
3396 #ifdef MAC
3397 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3398 	if (error) {
3399 		vput(vp);
3400 		return (error);
3401 	}
3402 #endif
3403 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3404 	if (error) {
3405 		vput(vp);
3406 		return (error);
3407 	}
3408 	VOP_UNLOCK(vp, 0, td);
3409 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3410 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3411 		if (error)
3412 			goto out;
3413 	}
3414 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3415 		goto out;
3416 	if (vcount(vp) > 1)
3417 		VOP_REVOKE(vp, REVOKEALL);
3418 	vn_finished_write(mp);
3419 out:
3420 	vrele(vp);
3421 	return (error);
3422 }
3423 
3424 /*
3425  * Convert a user file descriptor to a kernel file entry.
3426  * The file entry is locked upon returning.
3427  */
3428 int
3429 getvnode(fdp, fd, fpp)
3430 	struct filedesc *fdp;
3431 	int fd;
3432 	struct file **fpp;
3433 {
3434 	int error;
3435 	struct file *fp;
3436 
3437 	fp = NULL;
3438 	if (fdp == NULL)
3439 		error = EBADF;
3440 	else {
3441 		FILEDESC_LOCK(fdp);
3442 		if ((u_int)fd >= fdp->fd_nfiles ||
3443 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3444 			error = EBADF;
3445 		else if (fp->f_vnode == NULL) {
3446 			fp = NULL;
3447 			error = EINVAL;
3448 		} else {
3449 			fhold(fp);
3450 			error = 0;
3451 		}
3452 		FILEDESC_UNLOCK(fdp);
3453 	}
3454 	*fpp = fp;
3455 	return (error);
3456 }
3457 
3458 /*
3459  * Get (NFS) file handle
3460  */
3461 #ifndef _SYS_SYSPROTO_H_
3462 struct getfh_args {
3463 	char	*fname;
3464 	fhandle_t *fhp;
3465 };
3466 #endif
3467 int
3468 getfh(td, uap)
3469 	struct thread *td;
3470 	register struct getfh_args *uap;
3471 {
3472 	struct nameidata nd;
3473 	fhandle_t fh;
3474 	register struct vnode *vp;
3475 	int error;
3476 
3477 	/*
3478 	 * Must be super user
3479 	 */
3480 	error = suser(td);
3481 	if (error)
3482 		return (error);
3483 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3484 	error = namei(&nd);
3485 	if (error)
3486 		return (error);
3487 	NDFREE(&nd, NDF_ONLY_PNBUF);
3488 	vp = nd.ni_vp;
3489 	bzero(&fh, sizeof(fh));
3490 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3491 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3492 	vput(vp);
3493 	if (error)
3494 		return (error);
3495 	error = copyout(&fh, uap->fhp, sizeof (fh));
3496 	return (error);
3497 }
3498 
3499 /*
3500  * syscall for the rpc.lockd to use to translate a NFS file handle into
3501  * an open descriptor.
3502  *
3503  * warning: do not remove the suser() call or this becomes one giant
3504  * security hole.
3505  */
3506 #ifndef _SYS_SYSPROTO_H_
3507 struct fhopen_args {
3508 	const struct fhandle *u_fhp;
3509 	int flags;
3510 };
3511 #endif
3512 int
3513 fhopen(td, uap)
3514 	struct thread *td;
3515 	struct fhopen_args /* {
3516 		const struct fhandle *u_fhp;
3517 		int flags;
3518 	} */ *uap;
3519 {
3520 	struct proc *p = td->td_proc;
3521 	struct mount *mp;
3522 	struct vnode *vp;
3523 	struct fhandle fhp;
3524 	struct vattr vat;
3525 	struct vattr *vap = &vat;
3526 	struct flock lf;
3527 	struct file *fp;
3528 	register struct filedesc *fdp = p->p_fd;
3529 	int fmode, mode, error, type;
3530 	struct file *nfp;
3531 	int indx;
3532 
3533 	/*
3534 	 * Must be super user
3535 	 */
3536 	error = suser(td);
3537 	if (error)
3538 		return (error);
3539 
3540 	fmode = FFLAGS(uap->flags);
3541 	/* why not allow a non-read/write open for our lockd? */
3542 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3543 		return (EINVAL);
3544 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3545 	if (error)
3546 		return(error);
3547 	/* find the mount point */
3548 	mp = vfs_getvfs(&fhp.fh_fsid);
3549 	if (mp == NULL)
3550 		return (ESTALE);
3551 	/* now give me my vnode, it gets returned to me locked */
3552 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3553 	if (error)
3554 		return (error);
3555  	/*
3556 	 * from now on we have to make sure not
3557 	 * to forget about the vnode
3558 	 * any error that causes an abort must vput(vp)
3559 	 * just set error = err and 'goto bad;'.
3560 	 */
3561 
3562 	/*
3563 	 * from vn_open
3564 	 */
3565 	if (vp->v_type == VLNK) {
3566 		error = EMLINK;
3567 		goto bad;
3568 	}
3569 	if (vp->v_type == VSOCK) {
3570 		error = EOPNOTSUPP;
3571 		goto bad;
3572 	}
3573 	mode = 0;
3574 	if (fmode & (FWRITE | O_TRUNC)) {
3575 		if (vp->v_type == VDIR) {
3576 			error = EISDIR;
3577 			goto bad;
3578 		}
3579 		error = vn_writechk(vp);
3580 		if (error)
3581 			goto bad;
3582 		mode |= VWRITE;
3583 	}
3584 	if (fmode & FREAD)
3585 		mode |= VREAD;
3586 	if (fmode & O_APPEND)
3587 		mode |= VAPPEND;
3588 #ifdef MAC
3589 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3590 	if (error)
3591 		goto bad;
3592 #endif
3593 	if (mode) {
3594 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3595 		if (error)
3596 			goto bad;
3597 	}
3598 	if (fmode & O_TRUNC) {
3599 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3600 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3601 			vrele(vp);
3602 			return (error);
3603 		}
3604 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3605 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3606 #ifdef MAC
3607 		/*
3608 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3609 		 * should be right.
3610 		 */
3611 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3612 		if (error == 0) {
3613 #endif
3614 			VATTR_NULL(vap);
3615 			vap->va_size = 0;
3616 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3617 #ifdef MAC
3618 		}
3619 #endif
3620 		vn_finished_write(mp);
3621 		if (error)
3622 			goto bad;
3623 	}
3624 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3625 	if (error)
3626 		goto bad;
3627 	/*
3628 	 * Make sure that a VM object is created for VMIO support.
3629 	 */
3630 	if (vn_canvmio(vp) == TRUE) {
3631 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3632 			goto bad;
3633 	}
3634 	if (fmode & FWRITE)
3635 		vp->v_writecount++;
3636 
3637 	/*
3638 	 * end of vn_open code
3639 	 */
3640 
3641 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3642 		if (fmode & FWRITE)
3643 			vp->v_writecount--;
3644 		goto bad;
3645 	}
3646 	fp = nfp;
3647 
3648 	/*
3649 	 * Hold an extra reference to avoid having fp ripped out
3650 	 * from under us while we block in the lock op
3651 	 */
3652 	fhold(fp);
3653 	nfp->f_vnode = vp;
3654 	nfp->f_data = vp;
3655 	nfp->f_flag = fmode & FMASK;
3656 	nfp->f_ops = &vnops;
3657 	nfp->f_type = DTYPE_VNODE;
3658 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3659 		lf.l_whence = SEEK_SET;
3660 		lf.l_start = 0;
3661 		lf.l_len = 0;
3662 		if (fmode & O_EXLOCK)
3663 			lf.l_type = F_WRLCK;
3664 		else
3665 			lf.l_type = F_RDLCK;
3666 		type = F_FLOCK;
3667 		if ((fmode & FNONBLOCK) == 0)
3668 			type |= F_WAIT;
3669 		VOP_UNLOCK(vp, 0, td);
3670 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3671 			    type)) != 0) {
3672 			/*
3673 			 * The lock request failed.  Normally close the
3674 			 * descriptor but handle the case where someone might
3675 			 * have dup()d or close()d it when we weren't looking.
3676 			 */
3677 			FILEDESC_LOCK(fdp);
3678 			if (fdp->fd_ofiles[indx] == fp) {
3679 				fdp->fd_ofiles[indx] = NULL;
3680 				FILEDESC_UNLOCK(fdp);
3681 				fdrop(fp, td);
3682 			} else
3683 				FILEDESC_UNLOCK(fdp);
3684 			/*
3685 			 * release our private reference
3686 			 */
3687 			fdrop(fp, td);
3688 			return(error);
3689 		}
3690 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3691 		fp->f_flag |= FHASLOCK;
3692 	}
3693 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3694 		vfs_object_create(vp, td, td->td_ucred);
3695 
3696 	VOP_UNLOCK(vp, 0, td);
3697 	fdrop(fp, td);
3698 	td->td_retval[0] = indx;
3699 	return (0);
3700 
3701 bad:
3702 	vput(vp);
3703 	return (error);
3704 }
3705 
3706 /*
3707  * Stat an (NFS) file handle.
3708  */
3709 #ifndef _SYS_SYSPROTO_H_
3710 struct fhstat_args {
3711 	struct fhandle *u_fhp;
3712 	struct stat *sb;
3713 };
3714 #endif
3715 int
3716 fhstat(td, uap)
3717 	struct thread *td;
3718 	register struct fhstat_args /* {
3719 		struct fhandle *u_fhp;
3720 		struct stat *sb;
3721 	} */ *uap;
3722 {
3723 	struct stat sb;
3724 	fhandle_t fh;
3725 	struct mount *mp;
3726 	struct vnode *vp;
3727 	int error;
3728 
3729 	/*
3730 	 * Must be super user
3731 	 */
3732 	error = suser(td);
3733 	if (error)
3734 		return (error);
3735 
3736 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3737 	if (error)
3738 		return (error);
3739 
3740 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3741 		return (ESTALE);
3742 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3743 		return (error);
3744 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3745 	vput(vp);
3746 	if (error)
3747 		return (error);
3748 	error = copyout(&sb, uap->sb, sizeof(sb));
3749 	return (error);
3750 }
3751 
3752 /*
3753  * Implement fstatfs() for (NFS) file handles.
3754  */
3755 #ifndef _SYS_SYSPROTO_H_
3756 struct fhstatfs_args {
3757 	struct fhandle *u_fhp;
3758 	struct statfs *buf;
3759 };
3760 #endif
3761 int
3762 fhstatfs(td, uap)
3763 	struct thread *td;
3764 	struct fhstatfs_args /* {
3765 		struct fhandle *u_fhp;
3766 		struct statfs *buf;
3767 	} */ *uap;
3768 {
3769 	struct statfs *sp;
3770 	struct mount *mp;
3771 	struct vnode *vp;
3772 	struct statfs sb;
3773 	fhandle_t fh;
3774 	int error;
3775 
3776 	/*
3777 	 * Must be super user
3778 	 */
3779 	error = suser(td);
3780 	if (error)
3781 		return (error);
3782 
3783 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3784 		return (error);
3785 
3786 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3787 		return (ESTALE);
3788 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3789 		return (error);
3790 	mp = vp->v_mount;
3791 	sp = &mp->mnt_stat;
3792 	vput(vp);
3793 #ifdef MAC
3794 	error = mac_check_mount_stat(td->td_ucred, mp);
3795 	if (error)
3796 		return (error);
3797 #endif
3798 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3799 		return (error);
3800 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3801 	if (suser(td)) {
3802 		bcopy(sp, &sb, sizeof(sb));
3803 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3804 		sp = &sb;
3805 	}
3806 	return (copyout(sp, uap->buf, sizeof(*sp)));
3807 }
3808 
3809 /*
3810  * Syscall to push extended attribute configuration information into the
3811  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3812  * a command (int cmd), and attribute name and misc data.  For now, the
3813  * attribute name is left in userspace for consumption by the VFS_op.
3814  * It will probably be changed to be copied into sysspace by the
3815  * syscall in the future, once issues with various consumers of the
3816  * attribute code have raised their hands.
3817  *
3818  * Currently this is used only by UFS Extended Attributes.
3819  */
3820 int
3821 extattrctl(td, uap)
3822 	struct thread *td;
3823 	struct extattrctl_args /* {
3824 		const char *path;
3825 		int cmd;
3826 		const char *filename;
3827 		int attrnamespace;
3828 		const char *attrname;
3829 	} */ *uap;
3830 {
3831 	struct vnode *filename_vp;
3832 	struct nameidata nd;
3833 	struct mount *mp, *mp_writable;
3834 	char attrname[EXTATTR_MAXNAMELEN];
3835 	int error;
3836 
3837 	/*
3838 	 * uap->attrname is not always defined.  We check again later when we
3839 	 * invoke the VFS call so as to pass in NULL there if needed.
3840 	 */
3841 	if (uap->attrname != NULL) {
3842 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3843 		    NULL);
3844 		if (error)
3845 			return (error);
3846 	}
3847 
3848 	/*
3849 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3850 	 * which VFS_EXTATTRCTL() will later release.
3851 	 */
3852 	filename_vp = NULL;
3853 	if (uap->filename != NULL) {
3854 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3855 		    uap->filename, td);
3856 		error = namei(&nd);
3857 		if (error)
3858 			return (error);
3859 		filename_vp = nd.ni_vp;
3860 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3861 	}
3862 
3863 	/* uap->path is always defined. */
3864 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3865 	error = namei(&nd);
3866 	if (error) {
3867 		if (filename_vp != NULL)
3868 			vput(filename_vp);
3869 		return (error);
3870 	}
3871 	mp = nd.ni_vp->v_mount;
3872 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3873 	NDFREE(&nd, 0);
3874 	if (error) {
3875 		if (filename_vp != NULL)
3876 			vput(filename_vp);
3877 		return (error);
3878 	}
3879 
3880 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3881 	    uap->attrname != NULL ? attrname : NULL, td);
3882 
3883 	vn_finished_write(mp_writable);
3884 	/*
3885 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3886 	 * filename_vp, so vrele it if it is defined.
3887 	 */
3888 	if (filename_vp != NULL)
3889 		vrele(filename_vp);
3890 	return (error);
3891 }
3892 
3893 /*-
3894  * Set a named extended attribute on a file or directory
3895  *
3896  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3897  *            kernelspace string pointer "attrname", userspace buffer
3898  *            pointer "data", buffer length "nbytes", thread "td".
3899  * Returns: 0 on success, an error number otherwise
3900  * Locks: none
3901  * References: vp must be a valid reference for the duration of the call
3902  */
3903 static int
3904 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3905     void *data, size_t nbytes, struct thread *td)
3906 {
3907 	struct mount *mp;
3908 	struct uio auio;
3909 	struct iovec aiov;
3910 	ssize_t cnt;
3911 	int error;
3912 
3913 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3914 	if (error)
3915 		return (error);
3916 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3917 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3918 
3919 	aiov.iov_base = data;
3920 	aiov.iov_len = nbytes;
3921 	auio.uio_iov = &aiov;
3922 	auio.uio_iovcnt = 1;
3923 	auio.uio_offset = 0;
3924 	if (nbytes > INT_MAX) {
3925 		error = EINVAL;
3926 		goto done;
3927 	}
3928 	auio.uio_resid = nbytes;
3929 	auio.uio_rw = UIO_WRITE;
3930 	auio.uio_segflg = UIO_USERSPACE;
3931 	auio.uio_td = td;
3932 	cnt = nbytes;
3933 
3934 #ifdef MAC
3935 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3936 	    attrname, &auio);
3937 	if (error)
3938 		goto done;
3939 #endif
3940 
3941 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3942 	    td->td_ucred, td);
3943 	cnt -= auio.uio_resid;
3944 	td->td_retval[0] = cnt;
3945 
3946 done:
3947 	VOP_UNLOCK(vp, 0, td);
3948 	vn_finished_write(mp);
3949 	return (error);
3950 }
3951 
3952 int
3953 extattr_set_fd(td, uap)
3954 	struct thread *td;
3955 	struct extattr_set_fd_args /* {
3956 		int fd;
3957 		int attrnamespace;
3958 		const char *attrname;
3959 		void *data;
3960 		size_t nbytes;
3961 	} */ *uap;
3962 {
3963 	struct file *fp;
3964 	char attrname[EXTATTR_MAXNAMELEN];
3965 	int error;
3966 
3967 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3968 	if (error)
3969 		return (error);
3970 
3971 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3972 	if (error)
3973 		return (error);
3974 
3975 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
3976 	    attrname, uap->data, uap->nbytes, td);
3977 	fdrop(fp, td);
3978 
3979 	return (error);
3980 }
3981 
3982 int
3983 extattr_set_file(td, uap)
3984 	struct thread *td;
3985 	struct extattr_set_file_args /* {
3986 		const char *path;
3987 		int attrnamespace;
3988 		const char *attrname;
3989 		void *data;
3990 		size_t nbytes;
3991 	} */ *uap;
3992 {
3993 	struct nameidata nd;
3994 	char attrname[EXTATTR_MAXNAMELEN];
3995 	int error;
3996 
3997 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3998 	if (error)
3999 		return (error);
4000 
4001 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4002 	error = namei(&nd);
4003 	if (error)
4004 		return (error);
4005 	NDFREE(&nd, NDF_ONLY_PNBUF);
4006 
4007 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4008 	    uap->data, uap->nbytes, td);
4009 
4010 	vrele(nd.ni_vp);
4011 	return (error);
4012 }
4013 
4014 int
4015 extattr_set_link(td, uap)
4016 	struct thread *td;
4017 	struct extattr_set_link_args /* {
4018 		const char *path;
4019 		int attrnamespace;
4020 		const char *attrname;
4021 		void *data;
4022 		size_t nbytes;
4023 	} */ *uap;
4024 {
4025 	struct nameidata nd;
4026 	char attrname[EXTATTR_MAXNAMELEN];
4027 	int error;
4028 
4029 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4030 	if (error)
4031 		return (error);
4032 
4033 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4034 	error = namei(&nd);
4035 	if (error)
4036 		return (error);
4037 	NDFREE(&nd, NDF_ONLY_PNBUF);
4038 
4039 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4040 	    uap->data, uap->nbytes, td);
4041 
4042 	vrele(nd.ni_vp);
4043 	return (error);
4044 }
4045 
4046 /*-
4047  * Get a named extended attribute on a file or directory
4048  *
4049  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4050  *            kernelspace string pointer "attrname", userspace buffer
4051  *            pointer "data", buffer length "nbytes", thread "td".
4052  * Returns: 0 on success, an error number otherwise
4053  * Locks: none
4054  * References: vp must be a valid reference for the duration of the call
4055  */
4056 static int
4057 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4058     void *data, size_t nbytes, struct thread *td)
4059 {
4060 	struct uio auio, *auiop;
4061 	struct iovec aiov;
4062 	ssize_t cnt;
4063 	size_t size, *sizep;
4064 	int error;
4065 
4066 	/*
4067 	 * XXX: Temporary API compatibility for applications that know
4068 	 * about this hack ("" means list), but haven't been updated
4069 	 * for the extattr_list_*() system calls yet.  This will go
4070 	 * away for FreeBSD 5.3.
4071 	 */
4072 	if (strlen(attrname) == 0)
4073 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4074 
4075 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4076 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4077 
4078 	/*
4079 	 * Slightly unusual semantics: if the user provides a NULL data
4080 	 * pointer, they don't want to receive the data, just the
4081 	 * maximum read length.
4082 	 */
4083 	auiop = NULL;
4084 	sizep = NULL;
4085 	cnt = 0;
4086 	if (data != NULL) {
4087 		aiov.iov_base = data;
4088 		aiov.iov_len = nbytes;
4089 		auio.uio_iov = &aiov;
4090 		auio.uio_offset = 0;
4091 		if (nbytes > INT_MAX) {
4092 			error = EINVAL;
4093 			goto done;
4094 		}
4095 		auio.uio_resid = nbytes;
4096 		auio.uio_rw = UIO_READ;
4097 		auio.uio_segflg = UIO_USERSPACE;
4098 		auio.uio_td = td;
4099 		auiop = &auio;
4100 		cnt = nbytes;
4101 	} else
4102 		sizep = &size;
4103 
4104 #ifdef MAC
4105 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4106 	    attrname, &auio);
4107 	if (error)
4108 		goto done;
4109 #endif
4110 
4111 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4112 	    td->td_ucred, td);
4113 
4114 	if (auiop != NULL) {
4115 		cnt -= auio.uio_resid;
4116 		td->td_retval[0] = cnt;
4117 	} else
4118 		td->td_retval[0] = size;
4119 
4120 done:
4121 	VOP_UNLOCK(vp, 0, td);
4122 	return (error);
4123 }
4124 
4125 int
4126 extattr_get_fd(td, uap)
4127 	struct thread *td;
4128 	struct extattr_get_fd_args /* {
4129 		int fd;
4130 		int attrnamespace;
4131 		const char *attrname;
4132 		void *data;
4133 		size_t nbytes;
4134 	} */ *uap;
4135 {
4136 	struct file *fp;
4137 	char attrname[EXTATTR_MAXNAMELEN];
4138 	int error;
4139 
4140 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4141 	if (error)
4142 		return (error);
4143 
4144 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4145 	if (error)
4146 		return (error);
4147 
4148 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4149 	    attrname, uap->data, uap->nbytes, td);
4150 
4151 	fdrop(fp, td);
4152 	return (error);
4153 }
4154 
4155 int
4156 extattr_get_file(td, uap)
4157 	struct thread *td;
4158 	struct extattr_get_file_args /* {
4159 		const char *path;
4160 		int attrnamespace;
4161 		const char *attrname;
4162 		void *data;
4163 		size_t nbytes;
4164 	} */ *uap;
4165 {
4166 	struct nameidata nd;
4167 	char attrname[EXTATTR_MAXNAMELEN];
4168 	int error;
4169 
4170 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4171 	if (error)
4172 		return (error);
4173 
4174 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4175 	error = namei(&nd);
4176 	if (error)
4177 		return (error);
4178 	NDFREE(&nd, NDF_ONLY_PNBUF);
4179 
4180 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4181 	    uap->data, uap->nbytes, td);
4182 
4183 	vrele(nd.ni_vp);
4184 	return (error);
4185 }
4186 
4187 int
4188 extattr_get_link(td, uap)
4189 	struct thread *td;
4190 	struct extattr_get_link_args /* {
4191 		const char *path;
4192 		int attrnamespace;
4193 		const char *attrname;
4194 		void *data;
4195 		size_t nbytes;
4196 	} */ *uap;
4197 {
4198 	struct nameidata nd;
4199 	char attrname[EXTATTR_MAXNAMELEN];
4200 	int error;
4201 
4202 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4203 	if (error)
4204 		return (error);
4205 
4206 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4207 	error = namei(&nd);
4208 	if (error)
4209 		return (error);
4210 	NDFREE(&nd, NDF_ONLY_PNBUF);
4211 
4212 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4213 	    uap->data, uap->nbytes, td);
4214 
4215 	vrele(nd.ni_vp);
4216 	return (error);
4217 }
4218 
4219 /*
4220  * extattr_delete_vp(): Delete a named extended attribute on a file or
4221  *                      directory
4222  *
4223  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4224  *            kernelspace string pointer "attrname", proc "p"
4225  * Returns: 0 on success, an error number otherwise
4226  * Locks: none
4227  * References: vp must be a valid reference for the duration of the call
4228  */
4229 static int
4230 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4231     struct thread *td)
4232 {
4233 	struct mount *mp;
4234 	int error;
4235 
4236 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4237 	if (error)
4238 		return (error);
4239 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4240 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4241 
4242 #ifdef MAC
4243 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4244 	    attrname, NULL);
4245 	if (error)
4246 		goto done;
4247 #endif
4248 
4249 	error = VOP_RMEXTATTR(vp, attrnamespace, attrname, td->td_ucred, td);
4250 	if (error == EOPNOTSUPP)
4251 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4252 		    td->td_ucred, td);
4253 #ifdef MAC
4254 done:
4255 #endif
4256 	VOP_UNLOCK(vp, 0, td);
4257 	vn_finished_write(mp);
4258 	return (error);
4259 }
4260 
4261 int
4262 extattr_delete_fd(td, uap)
4263 	struct thread *td;
4264 	struct extattr_delete_fd_args /* {
4265 		int fd;
4266 		int attrnamespace;
4267 		const char *attrname;
4268 	} */ *uap;
4269 {
4270 	struct file *fp;
4271 	struct vnode *vp;
4272 	char attrname[EXTATTR_MAXNAMELEN];
4273 	int error;
4274 
4275 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4276 	if (error)
4277 		return (error);
4278 
4279 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4280 	if (error)
4281 		return (error);
4282 	vp = fp->f_vnode;
4283 
4284 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4285 	fdrop(fp, td);
4286 	return (error);
4287 }
4288 
4289 int
4290 extattr_delete_file(td, uap)
4291 	struct thread *td;
4292 	struct extattr_delete_file_args /* {
4293 		const char *path;
4294 		int attrnamespace;
4295 		const char *attrname;
4296 	} */ *uap;
4297 {
4298 	struct nameidata nd;
4299 	char attrname[EXTATTR_MAXNAMELEN];
4300 	int error;
4301 
4302 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4303 	if (error)
4304 		return(error);
4305 
4306 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4307 	error = namei(&nd);
4308 	if (error)
4309 		return(error);
4310 	NDFREE(&nd, NDF_ONLY_PNBUF);
4311 
4312 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4313 	vrele(nd.ni_vp);
4314 	return(error);
4315 }
4316 
4317 int
4318 extattr_delete_link(td, uap)
4319 	struct thread *td;
4320 	struct extattr_delete_link_args /* {
4321 		const char *path;
4322 		int attrnamespace;
4323 		const char *attrname;
4324 	} */ *uap;
4325 {
4326 	struct nameidata nd;
4327 	char attrname[EXTATTR_MAXNAMELEN];
4328 	int error;
4329 
4330 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4331 	if (error)
4332 		return(error);
4333 
4334 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4335 	error = namei(&nd);
4336 	if (error)
4337 		return(error);
4338 	NDFREE(&nd, NDF_ONLY_PNBUF);
4339 
4340 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4341 	vrele(nd.ni_vp);
4342 	return(error);
4343 }
4344 
4345 /*-
4346  * Retrieve a list of extended attributes on a file or directory.
4347  *
4348  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4349  *            userspace buffer pointer "data", buffer length "nbytes",
4350  *            thread "td".
4351  * Returns: 0 on success, an error number otherwise
4352  * Locks: none
4353  * References: vp must be a valid reference for the duration of the call
4354  */
4355 static int
4356 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4357     size_t nbytes, struct thread *td)
4358 {
4359 	struct uio auio, *auiop;
4360 	size_t size, *sizep;
4361 	struct iovec aiov;
4362 	ssize_t cnt;
4363 	int error;
4364 
4365 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4366 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4367 
4368 	auiop = NULL;
4369 	sizep = NULL;
4370 	cnt = 0;
4371 	if (data != NULL) {
4372 		aiov.iov_base = data;
4373 		aiov.iov_len = nbytes;
4374 		auio.uio_iov = &aiov;
4375 		auio.uio_offset = 0;
4376 		if (nbytes > INT_MAX) {
4377 			error = EINVAL;
4378 			goto done;
4379 		}
4380 		auio.uio_resid = nbytes;
4381 		auio.uio_rw = UIO_READ;
4382 		auio.uio_segflg = UIO_USERSPACE;
4383 		auio.uio_td = td;
4384 		auiop = &auio;
4385 		cnt = nbytes;
4386 	} else
4387 		sizep = &size;
4388 
4389 #ifdef MAC
4390 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4391 	    "", &auio);
4392 	if (error)
4393 		goto done;
4394 #endif
4395 
4396 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4397 	    td->td_ucred, td);
4398 
4399 	if (auiop != NULL) {
4400 		cnt -= auio.uio_resid;
4401 		td->td_retval[0] = cnt;
4402 	} else
4403 		td->td_retval[0] = size;
4404 
4405 done:
4406 	VOP_UNLOCK(vp, 0, td);
4407 	return (error);
4408 }
4409 
4410 
4411 int
4412 extattr_list_fd(td, uap)
4413 	struct thread *td;
4414 	struct extattr_list_fd_args /* {
4415 		int fd;
4416 		int attrnamespace;
4417 		void *data;
4418 		size_t nbytes;
4419 	} */ *uap;
4420 {
4421 	struct file *fp;
4422 	int error;
4423 
4424 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4425 	if (error)
4426 		return (error);
4427 
4428 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4429 	    uap->nbytes, td);
4430 
4431 	fdrop(fp, td);
4432 	return (error);
4433 }
4434 
4435 int
4436 extattr_list_file(td, uap)
4437 	struct thread*td;
4438 	struct extattr_list_file_args /* {
4439 		const char *path;
4440 		int attrnamespace;
4441 		void *data;
4442 		size_t nbytes;
4443 	} */ *uap;
4444 {
4445 	struct nameidata nd;
4446 	int error;
4447 
4448 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4449 	error = namei(&nd);
4450 	if (error)
4451 		return (error);
4452 	NDFREE(&nd, NDF_ONLY_PNBUF);
4453 
4454 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4455 	    uap->nbytes, td);
4456 
4457 	vrele(nd.ni_vp);
4458 	return (error);
4459 }
4460 
4461 int
4462 extattr_list_link(td, uap)
4463 	struct thread*td;
4464 	struct extattr_list_link_args /* {
4465 		const char *path;
4466 		int attrnamespace;
4467 		void *data;
4468 		size_t nbytes;
4469 	} */ *uap;
4470 {
4471 	struct nameidata nd;
4472 	int error;
4473 
4474 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4475 	error = namei(&nd);
4476 	if (error)
4477 		return (error);
4478 	NDFREE(&nd, NDF_ONLY_PNBUF);
4479 
4480 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4481 	    uap->nbytes, td);
4482 
4483 	vrele(nd.ni_vp);
4484 	return (error);
4485 }
4486 
4487