xref: /freebsd/sys/kern/vfs_extattr.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 /* For 4.3 integer FS ID compatibility */
45 #include "opt_compat.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/sysent.h>
53 #include <sys/mac.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/mutex.h>
57 #include <sys/sysproto.h>
58 #include <sys/namei.h>
59 #include <sys/filedesc.h>
60 #include <sys/kernel.h>
61 #include <sys/fcntl.h>
62 #include <sys/file.h>
63 #include <sys/limits.h>
64 #include <sys/linker.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/proc.h>
70 #include <sys/dirent.h>
71 #include <sys/extattr.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 
76 #include <machine/stdarg.h>
77 
78 #include <vm/vm.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/uma.h>
82 
83 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86 static int setfmode(struct thread *td, struct vnode *, int);
87 static int setfflags(struct thread *td, struct vnode *, int);
88 static int setutimes(struct thread *td, struct vnode *,
89     const struct timespec *, int, int);
90 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91     struct thread *td);
92 
93 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94     size_t nbytes, struct thread *td);
95 
96 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97 int (*softdep_fsync_hook)(struct vnode *);
98 
99 /*
100  * The module initialization routine for POSIX asynchronous I/O will
101  * set this to the version of AIO that it implements.  (Zero means
102  * that it is not implemented.)  This value is used here by pathconf()
103  * and in kern_descrip.c by fpathconf().
104  */
105 int async_io_version;
106 
107 /*
108  * Sync each mounted filesystem.
109  */
110 #ifndef _SYS_SYSPROTO_H_
111 struct sync_args {
112         int     dummy;
113 };
114 #endif
115 
116 #ifdef DEBUG
117 static int syncprt = 0;
118 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119 #endif
120 
121 /* ARGSUSED */
122 int
123 sync(td, uap)
124 	struct thread *td;
125 	struct sync_args *uap;
126 {
127 	struct mount *mp, *nmp;
128 	int asyncflag;
129 
130 	mtx_lock(&mountlist_mtx);
131 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133 			nmp = TAILQ_NEXT(mp, mnt_list);
134 			continue;
135 		}
136 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138 			asyncflag = mp->mnt_flag & MNT_ASYNC;
139 			mp->mnt_flag &= ~MNT_ASYNC;
140 			vfs_msync(mp, MNT_NOWAIT);
141 			VFS_SYNC(mp, MNT_NOWAIT,
142 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143 			mp->mnt_flag |= asyncflag;
144 			vn_finished_write(mp);
145 		}
146 		mtx_lock(&mountlist_mtx);
147 		nmp = TAILQ_NEXT(mp, mnt_list);
148 		vfs_unbusy(mp, td);
149 	}
150 	mtx_unlock(&mountlist_mtx);
151 #if 0
152 /*
153  * XXX don't call vfs_bufstats() yet because that routine
154  * was not imported in the Lite2 merge.
155  */
156 #ifdef DIAGNOSTIC
157 	if (syncprt)
158 		vfs_bufstats();
159 #endif /* DIAGNOSTIC */
160 #endif
161 	return (0);
162 }
163 
164 /* XXX PRISON: could be per prison flag */
165 static int prison_quotas;
166 #if 0
167 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168 #endif
169 
170 /*
171  * Change filesystem quotas.
172  */
173 #ifndef _SYS_SYSPROTO_H_
174 struct quotactl_args {
175 	char *path;
176 	int cmd;
177 	int uid;
178 	caddr_t arg;
179 };
180 #endif
181 /* ARGSUSED */
182 int
183 quotactl(td, uap)
184 	struct thread *td;
185 	register struct quotactl_args /* {
186 		char *path;
187 		int cmd;
188 		int uid;
189 		caddr_t arg;
190 	} */ *uap;
191 {
192 	struct mount *mp;
193 	int error;
194 	struct nameidata nd;
195 
196 	if (jailed(td->td_ucred) && !prison_quotas)
197 		return (EPERM);
198 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199 	if ((error = namei(&nd)) != 0)
200 		return (error);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203 	vrele(nd.ni_vp);
204 	if (error)
205 		return (error);
206 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207 	vn_finished_write(mp);
208 	return (error);
209 }
210 
211 /*
212  * Get filesystem statistics.
213  */
214 #ifndef _SYS_SYSPROTO_H_
215 struct statfs_args {
216 	char *path;
217 	struct statfs *buf;
218 };
219 #endif
220 /* ARGSUSED */
221 int
222 statfs(td, uap)
223 	struct thread *td;
224 	register struct statfs_args /* {
225 		char *path;
226 		struct statfs *buf;
227 	} */ *uap;
228 {
229 	register struct mount *mp;
230 	register struct statfs *sp;
231 	int error;
232 	struct nameidata nd;
233 	struct statfs sb;
234 
235 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236 	if ((error = namei(&nd)) != 0)
237 		return (error);
238 	mp = nd.ni_vp->v_mount;
239 	sp = &mp->mnt_stat;
240 	NDFREE(&nd, NDF_ONLY_PNBUF);
241 	vrele(nd.ni_vp);
242 #ifdef MAC
243 	error = mac_check_mount_stat(td->td_ucred, mp);
244 	if (error)
245 		return (error);
246 #endif
247 	error = VFS_STATFS(mp, sp, td);
248 	if (error)
249 		return (error);
250 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251 	if (suser(td)) {
252 		bcopy(sp, &sb, sizeof(sb));
253 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254 		sp = &sb;
255 	}
256 	return (copyout(sp, uap->buf, sizeof(*sp)));
257 }
258 
259 /*
260  * Get filesystem statistics.
261  */
262 #ifndef _SYS_SYSPROTO_H_
263 struct fstatfs_args {
264 	int fd;
265 	struct statfs *buf;
266 };
267 #endif
268 /* ARGSUSED */
269 int
270 fstatfs(td, uap)
271 	struct thread *td;
272 	register struct fstatfs_args /* {
273 		int fd;
274 		struct statfs *buf;
275 	} */ *uap;
276 {
277 	struct file *fp;
278 	struct mount *mp;
279 	register struct statfs *sp;
280 	int error;
281 	struct statfs sb;
282 
283 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284 		return (error);
285 	mp = fp->f_vnode->v_mount;
286 	fdrop(fp, td);
287 	if (mp == NULL)
288 		return (EBADF);
289 #ifdef MAC
290 	error = mac_check_mount_stat(td->td_ucred, mp);
291 	if (error)
292 		return (error);
293 #endif
294 	sp = &mp->mnt_stat;
295 	error = VFS_STATFS(mp, sp, td);
296 	if (error)
297 		return (error);
298 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299 	if (suser(td)) {
300 		bcopy(sp, &sb, sizeof(sb));
301 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302 		sp = &sb;
303 	}
304 	return (copyout(sp, uap->buf, sizeof(*sp)));
305 }
306 
307 /*
308  * Get statistics on all filesystems.
309  */
310 #ifndef _SYS_SYSPROTO_H_
311 struct getfsstat_args {
312 	struct statfs *buf;
313 	long bufsize;
314 	int flags;
315 };
316 #endif
317 int
318 getfsstat(td, uap)
319 	struct thread *td;
320 	register struct getfsstat_args /* {
321 		struct statfs *buf;
322 		long bufsize;
323 		int flags;
324 	} */ *uap;
325 {
326 	register struct mount *mp, *nmp;
327 	register struct statfs *sp;
328 	caddr_t sfsp;
329 	long count, maxcount, error;
330 
331 	maxcount = uap->bufsize / sizeof(struct statfs);
332 	sfsp = (caddr_t)uap->buf;
333 	count = 0;
334 	mtx_lock(&mountlist_mtx);
335 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336 #ifdef MAC
337 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338 			nmp = TAILQ_NEXT(mp, mnt_list);
339 			continue;
340 		}
341 #endif
342 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343 			nmp = TAILQ_NEXT(mp, mnt_list);
344 			continue;
345 		}
346 		if (sfsp && count < maxcount) {
347 			sp = &mp->mnt_stat;
348 			/*
349 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351 			 * overrides MNT_WAIT.
352 			 */
353 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354 			    (uap->flags & MNT_WAIT)) &&
355 			    (error = VFS_STATFS(mp, sp, td))) {
356 				mtx_lock(&mountlist_mtx);
357 				nmp = TAILQ_NEXT(mp, mnt_list);
358 				vfs_unbusy(mp, td);
359 				continue;
360 			}
361 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362 			error = copyout(sp, sfsp, sizeof(*sp));
363 			if (error) {
364 				vfs_unbusy(mp, td);
365 				return (error);
366 			}
367 			sfsp += sizeof(*sp);
368 		}
369 		count++;
370 		mtx_lock(&mountlist_mtx);
371 		nmp = TAILQ_NEXT(mp, mnt_list);
372 		vfs_unbusy(mp, td);
373 	}
374 	mtx_unlock(&mountlist_mtx);
375 	if (sfsp && count > maxcount)
376 		td->td_retval[0] = maxcount;
377 	else
378 		td->td_retval[0] = count;
379 	return (0);
380 }
381 
382 /*
383  * Change current working directory to a given file descriptor.
384  */
385 #ifndef _SYS_SYSPROTO_H_
386 struct fchdir_args {
387 	int	fd;
388 };
389 #endif
390 /* ARGSUSED */
391 int
392 fchdir(td, uap)
393 	struct thread *td;
394 	struct fchdir_args /* {
395 		int fd;
396 	} */ *uap;
397 {
398 	register struct filedesc *fdp = td->td_proc->p_fd;
399 	struct vnode *vp, *tdp, *vpold;
400 	struct mount *mp;
401 	struct file *fp;
402 	int error;
403 
404 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405 		return (error);
406 	vp = fp->f_vnode;
407 	VREF(vp);
408 	fdrop(fp, td);
409 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410 	if (vp->v_type != VDIR)
411 		error = ENOTDIR;
412 #ifdef MAC
413 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414 	}
415 #endif
416 	else
417 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418 	while (!error && (mp = vp->v_mountedhere) != NULL) {
419 		if (vfs_busy(mp, 0, 0, td))
420 			continue;
421 		error = VFS_ROOT(mp, &tdp);
422 		vfs_unbusy(mp, td);
423 		if (error)
424 			break;
425 		vput(vp);
426 		vp = tdp;
427 	}
428 	if (error) {
429 		vput(vp);
430 		return (error);
431 	}
432 	VOP_UNLOCK(vp, 0, td);
433 	FILEDESC_LOCK(fdp);
434 	vpold = fdp->fd_cdir;
435 	fdp->fd_cdir = vp;
436 	FILEDESC_UNLOCK(fdp);
437 	vrele(vpold);
438 	return (0);
439 }
440 
441 /*
442  * Change current working directory (``.'').
443  */
444 #ifndef _SYS_SYSPROTO_H_
445 struct chdir_args {
446 	char	*path;
447 };
448 #endif
449 /* ARGSUSED */
450 int
451 chdir(td, uap)
452 	struct thread *td;
453 	struct chdir_args /* {
454 		char *path;
455 	} */ *uap;
456 {
457 
458 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459 }
460 
461 int
462 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463 {
464 	register struct filedesc *fdp = td->td_proc->p_fd;
465 	int error;
466 	struct nameidata nd;
467 	struct vnode *vp;
468 
469 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470 	if ((error = namei(&nd)) != 0)
471 		return (error);
472 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473 		vput(nd.ni_vp);
474 		NDFREE(&nd, NDF_ONLY_PNBUF);
475 		return (error);
476 	}
477 	VOP_UNLOCK(nd.ni_vp, 0, td);
478 	NDFREE(&nd, NDF_ONLY_PNBUF);
479 	FILEDESC_LOCK(fdp);
480 	vp = fdp->fd_cdir;
481 	fdp->fd_cdir = nd.ni_vp;
482 	FILEDESC_UNLOCK(fdp);
483 	vrele(vp);
484 	return (0);
485 }
486 
487 /*
488  * Helper function for raised chroot(2) security function:  Refuse if
489  * any filedescriptors are open directories.
490  */
491 static int
492 chroot_refuse_vdir_fds(fdp)
493 	struct filedesc *fdp;
494 {
495 	struct vnode *vp;
496 	struct file *fp;
497 	int fd;
498 
499 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501 		fp = fget_locked(fdp, fd);
502 		if (fp == NULL)
503 			continue;
504 		if (fp->f_type == DTYPE_VNODE) {
505 			vp = fp->f_vnode;
506 			if (vp->v_type == VDIR)
507 				return (EPERM);
508 		}
509 	}
510 	return (0);
511 }
512 
513 /*
514  * This sysctl determines if we will allow a process to chroot(2) if it
515  * has a directory open:
516  *	0: disallowed for all processes.
517  *	1: allowed for processes that were not already chroot(2)'ed.
518  *	2: allowed for all processes.
519  */
520 
521 static int chroot_allow_open_directories = 1;
522 
523 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524      &chroot_allow_open_directories, 0, "");
525 
526 /*
527  * Change notion of root (``/'') directory.
528  */
529 #ifndef _SYS_SYSPROTO_H_
530 struct chroot_args {
531 	char	*path;
532 };
533 #endif
534 /* ARGSUSED */
535 int
536 chroot(td, uap)
537 	struct thread *td;
538 	struct chroot_args /* {
539 		char *path;
540 	} */ *uap;
541 {
542 	int error;
543 	struct nameidata nd;
544 
545 	error = suser_cred(td->td_ucred, PRISON_ROOT);
546 	if (error)
547 		return (error);
548 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549 	mtx_lock(&Giant);
550 	error = namei(&nd);
551 	if (error)
552 		goto error;
553 	if ((error = change_dir(nd.ni_vp, td)) != 0)
554 		goto e_vunlock;
555 #ifdef MAC
556 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557 		goto e_vunlock;
558 #endif
559 	VOP_UNLOCK(nd.ni_vp, 0, td);
560 	error = change_root(nd.ni_vp, td);
561 	vrele(nd.ni_vp);
562 	NDFREE(&nd, NDF_ONLY_PNBUF);
563 	mtx_unlock(&Giant);
564 	return (error);
565 e_vunlock:
566 	vput(nd.ni_vp);
567 error:
568 	mtx_unlock(&Giant);
569 	NDFREE(&nd, NDF_ONLY_PNBUF);
570 	return (error);
571 }
572 
573 /*
574  * Common routine for chroot and chdir.  Callers must provide a locked vnode
575  * instance.
576  */
577 int
578 change_dir(vp, td)
579 	struct vnode *vp;
580 	struct thread *td;
581 {
582 	int error;
583 
584 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585 	if (vp->v_type != VDIR)
586 		return (ENOTDIR);
587 #ifdef MAC
588 	error = mac_check_vnode_chdir(td->td_ucred, vp);
589 	if (error)
590 		return (error);
591 #endif
592 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593 	return (error);
594 }
595 
596 /*
597  * Common routine for kern_chroot() and jail_attach().  The caller is
598  * responsible for invoking suser() and mac_check_chroot() to authorize this
599  * operation.
600  */
601 int
602 change_root(vp, td)
603 	struct vnode *vp;
604 	struct thread *td;
605 {
606 	struct filedesc *fdp;
607 	struct vnode *oldvp;
608 	int error;
609 
610 	mtx_assert(&Giant, MA_OWNED);
611 	fdp = td->td_proc->p_fd;
612 	FILEDESC_LOCK(fdp);
613 	if (chroot_allow_open_directories == 0 ||
614 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615 		error = chroot_refuse_vdir_fds(fdp);
616 		if (error) {
617 			FILEDESC_UNLOCK(fdp);
618 			return (error);
619 		}
620 	}
621 	oldvp = fdp->fd_rdir;
622 	fdp->fd_rdir = vp;
623 	VREF(fdp->fd_rdir);
624 	if (!fdp->fd_jdir) {
625 		fdp->fd_jdir = vp;
626 		VREF(fdp->fd_jdir);
627 	}
628 	FILEDESC_UNLOCK(fdp);
629 	vrele(oldvp);
630 	return (0);
631 }
632 
633 /*
634  * Check permissions, allocate an open file structure,
635  * and call the device open routine if any.
636  */
637 #ifndef _SYS_SYSPROTO_H_
638 struct open_args {
639 	char	*path;
640 	int	flags;
641 	int	mode;
642 };
643 #endif
644 int
645 open(td, uap)
646 	struct thread *td;
647 	register struct open_args /* {
648 		char *path;
649 		int flags;
650 		int mode;
651 	} */ *uap;
652 {
653 
654 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655 }
656 
657 int
658 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659     int mode)
660 {
661 	struct proc *p = td->td_proc;
662 	struct filedesc *fdp = p->p_fd;
663 	struct file *fp;
664 	struct vnode *vp;
665 	struct vattr vat;
666 	struct mount *mp;
667 	int cmode;
668 	struct file *nfp;
669 	int type, indx, error;
670 	struct flock lf;
671 	struct nameidata nd;
672 
673 	if ((flags & O_ACCMODE) == O_ACCMODE)
674 		return (EINVAL);
675 	flags = FFLAGS(flags);
676 	error = falloc(td, &nfp, &indx);
677 	if (error)
678 		return (error);
679 	fp = nfp;
680 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
681 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
682 	td->td_dupfd = -1;		/* XXX check for fdopen */
683 	/*
684 	 * Bump the ref count to prevent another process from closing
685 	 * the descriptor while we are blocked in vn_open()
686 	 */
687 	fhold(fp);
688 	error = vn_open(&nd, &flags, cmode, indx);
689 	if (error) {
690 
691 		/*
692 		 * If the vn_open replaced the method vector, something
693 		 * wonderous happened deep below and we just pass it up
694 		 * pretending we know what we do.
695 		 */
696 		if (error == ENXIO && fp->f_ops != &badfileops) {
697 			fdrop(fp, td);
698 			td->td_retval[0] = indx;
699 			return (0);
700 		}
701 
702 		/*
703 		 * release our own reference
704 		 */
705 		fdrop(fp, td);
706 
707 		/*
708 		 * handle special fdopen() case.  bleh.  dupfdopen() is
709 		 * responsible for dropping the old contents of ofiles[indx]
710 		 * if it succeeds.
711 		 */
712 		if ((error == ENODEV || error == ENXIO) &&
713 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
714 		    (error =
715 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
716 			td->td_retval[0] = indx;
717 			return (0);
718 		}
719 		/*
720 		 * Clean up the descriptor, but only if another thread hadn't
721 		 * replaced or closed it.
722 		 */
723 		FILEDESC_LOCK(fdp);
724 		if (fdp->fd_ofiles[indx] == fp) {
725 			fdp->fd_ofiles[indx] = NULL;
726 			FILEDESC_UNLOCK(fdp);
727 			fdrop(fp, td);
728 		} else
729 			FILEDESC_UNLOCK(fdp);
730 
731 		if (error == ERESTART)
732 			error = EINTR;
733 		return (error);
734 	}
735 	td->td_dupfd = 0;
736 	NDFREE(&nd, NDF_ONLY_PNBUF);
737 	vp = nd.ni_vp;
738 
739 	/*
740 	 * There should be 2 references on the file, one from the descriptor
741 	 * table, and one for us.
742 	 *
743 	 * Handle the case where someone closed the file (via its file
744 	 * descriptor) while we were blocked.  The end result should look
745 	 * like opening the file succeeded but it was immediately closed.
746 	 */
747 	FILEDESC_LOCK(fdp);
748 	FILE_LOCK(fp);
749 	if (fp->f_count == 1) {
750 		KASSERT(fdp->fd_ofiles[indx] != fp,
751 		    ("Open file descriptor lost all refs"));
752 		FILEDESC_UNLOCK(fdp);
753 		FILE_UNLOCK(fp);
754 		VOP_UNLOCK(vp, 0, td);
755 		vn_close(vp, flags & FMASK, fp->f_cred, td);
756 		fdrop(fp, td);
757 		td->td_retval[0] = indx;
758 		return 0;
759 	}
760 	fp->f_vnode = vp;
761 	fp->f_data = vp;
762 	fp->f_flag = flags & FMASK;
763 	fp->f_ops = &vnops;
764 	fp->f_seqcount = 1;
765 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
766 	FILEDESC_UNLOCK(fdp);
767 	FILE_UNLOCK(fp);
768 
769 	/* assert that vn_open created a backing object if one is needed */
770 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
771 		("open: vmio vnode has no backing object after vn_open"));
772 
773 	VOP_UNLOCK(vp, 0, td);
774 	if (flags & (O_EXLOCK | O_SHLOCK)) {
775 		lf.l_whence = SEEK_SET;
776 		lf.l_start = 0;
777 		lf.l_len = 0;
778 		if (flags & O_EXLOCK)
779 			lf.l_type = F_WRLCK;
780 		else
781 			lf.l_type = F_RDLCK;
782 		type = F_FLOCK;
783 		if ((flags & FNONBLOCK) == 0)
784 			type |= F_WAIT;
785 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
786 			    type)) != 0)
787 			goto bad;
788 		fp->f_flag |= FHASLOCK;
789 	}
790 	if (flags & O_TRUNC) {
791 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
792 			goto bad;
793 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
794 		VATTR_NULL(&vat);
795 		vat.va_size = 0;
796 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
797 #ifdef MAC
798 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
799 		if (error == 0)
800 #endif
801 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
802 		VOP_UNLOCK(vp, 0, td);
803 		vn_finished_write(mp);
804 		if (error)
805 			goto bad;
806 	}
807 	/*
808 	 * Release our private reference, leaving the one associated with
809 	 * the descriptor table intact.
810 	 */
811 	fdrop(fp, td);
812 	td->td_retval[0] = indx;
813 	return (0);
814 bad:
815 	FILEDESC_LOCK(fdp);
816 	if (fdp->fd_ofiles[indx] == fp) {
817 		fdp->fd_ofiles[indx] = NULL;
818 		FILEDESC_UNLOCK(fdp);
819 		fdrop(fp, td);
820 	} else
821 		FILEDESC_UNLOCK(fdp);
822 	fdrop(fp, td);
823 	return (error);
824 }
825 
826 #ifdef COMPAT_43
827 /*
828  * Create a file.
829  */
830 #ifndef _SYS_SYSPROTO_H_
831 struct ocreat_args {
832 	char	*path;
833 	int	mode;
834 };
835 #endif
836 int
837 ocreat(td, uap)
838 	struct thread *td;
839 	register struct ocreat_args /* {
840 		char *path;
841 		int mode;
842 	} */ *uap;
843 {
844 	struct open_args /* {
845 		char *path;
846 		int flags;
847 		int mode;
848 	} */ nuap;
849 
850 	nuap.path = uap->path;
851 	nuap.mode = uap->mode;
852 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
853 	return (open(td, &nuap));
854 }
855 #endif /* COMPAT_43 */
856 
857 /*
858  * Create a special file.
859  */
860 #ifndef _SYS_SYSPROTO_H_
861 struct mknod_args {
862 	char	*path;
863 	int	mode;
864 	int	dev;
865 };
866 #endif
867 /* ARGSUSED */
868 int
869 mknod(td, uap)
870 	struct thread *td;
871 	register struct mknod_args /* {
872 		char *path;
873 		int mode;
874 		int dev;
875 	} */ *uap;
876 {
877 
878 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
879 }
880 
881 int
882 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
883     int dev)
884 {
885 	struct vnode *vp;
886 	struct mount *mp;
887 	struct vattr vattr;
888 	int error;
889 	int whiteout = 0;
890 	struct nameidata nd;
891 
892 	switch (mode & S_IFMT) {
893 	case S_IFCHR:
894 	case S_IFBLK:
895 		error = suser(td);
896 		break;
897 	default:
898 		error = suser_cred(td->td_ucred, PRISON_ROOT);
899 		break;
900 	}
901 	if (error)
902 		return (error);
903 restart:
904 	bwillwrite();
905 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
906 	if ((error = namei(&nd)) != 0)
907 		return (error);
908 	vp = nd.ni_vp;
909 	if (vp != NULL) {
910 		NDFREE(&nd, NDF_ONLY_PNBUF);
911 		vrele(vp);
912 		if (vp == nd.ni_dvp)
913 			vrele(nd.ni_dvp);
914 		else
915 			vput(nd.ni_dvp);
916 		return (EEXIST);
917 	} else {
918 		VATTR_NULL(&vattr);
919 		FILEDESC_LOCK(td->td_proc->p_fd);
920 		vattr.va_mode = (mode & ALLPERMS) &
921 		    ~td->td_proc->p_fd->fd_cmask;
922 		FILEDESC_UNLOCK(td->td_proc->p_fd);
923 		vattr.va_rdev = dev;
924 		whiteout = 0;
925 
926 		switch (mode & S_IFMT) {
927 		case S_IFMT:	/* used by badsect to flag bad sectors */
928 			vattr.va_type = VBAD;
929 			break;
930 		case S_IFCHR:
931 			vattr.va_type = VCHR;
932 			break;
933 		case S_IFBLK:
934 			vattr.va_type = VBLK;
935 			break;
936 		case S_IFWHT:
937 			whiteout = 1;
938 			break;
939 		default:
940 			error = EINVAL;
941 			break;
942 		}
943 	}
944 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
945 		NDFREE(&nd, NDF_ONLY_PNBUF);
946 		vput(nd.ni_dvp);
947 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
948 			return (error);
949 		goto restart;
950 	}
951 #ifdef MAC
952 	if (error == 0 && !whiteout)
953 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
954 		    &nd.ni_cnd, &vattr);
955 #endif
956 	if (!error) {
957 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
958 		if (whiteout)
959 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
960 		else {
961 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
962 						&nd.ni_cnd, &vattr);
963 			if (error == 0)
964 				vput(nd.ni_vp);
965 		}
966 	}
967 	NDFREE(&nd, NDF_ONLY_PNBUF);
968 	vput(nd.ni_dvp);
969 	vn_finished_write(mp);
970 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
971 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
972 	return (error);
973 }
974 
975 /*
976  * Create a named pipe.
977  */
978 #ifndef _SYS_SYSPROTO_H_
979 struct mkfifo_args {
980 	char	*path;
981 	int	mode;
982 };
983 #endif
984 /* ARGSUSED */
985 int
986 mkfifo(td, uap)
987 	struct thread *td;
988 	register struct mkfifo_args /* {
989 		char *path;
990 		int mode;
991 	} */ *uap;
992 {
993 
994 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
995 }
996 
997 int
998 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
999 {
1000 	struct mount *mp;
1001 	struct vattr vattr;
1002 	int error;
1003 	struct nameidata nd;
1004 
1005 restart:
1006 	bwillwrite();
1007 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1008 	if ((error = namei(&nd)) != 0)
1009 		return (error);
1010 	if (nd.ni_vp != NULL) {
1011 		NDFREE(&nd, NDF_ONLY_PNBUF);
1012 		vrele(nd.ni_vp);
1013 		if (nd.ni_vp == nd.ni_dvp)
1014 			vrele(nd.ni_dvp);
1015 		else
1016 			vput(nd.ni_dvp);
1017 		return (EEXIST);
1018 	}
1019 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1020 		NDFREE(&nd, NDF_ONLY_PNBUF);
1021 		vput(nd.ni_dvp);
1022 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1023 			return (error);
1024 		goto restart;
1025 	}
1026 	VATTR_NULL(&vattr);
1027 	vattr.va_type = VFIFO;
1028 	FILEDESC_LOCK(td->td_proc->p_fd);
1029 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1030 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1031 #ifdef MAC
1032 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1033 	    &vattr);
1034 	if (error)
1035 		goto out;
1036 #endif
1037 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1038 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1039 	if (error == 0)
1040 		vput(nd.ni_vp);
1041 #ifdef MAC
1042 out:
1043 #endif
1044 	NDFREE(&nd, NDF_ONLY_PNBUF);
1045 	vput(nd.ni_dvp);
1046 	vn_finished_write(mp);
1047 	return (error);
1048 }
1049 
1050 /*
1051  * Make a hard file link.
1052  */
1053 #ifndef _SYS_SYSPROTO_H_
1054 struct link_args {
1055 	char	*path;
1056 	char	*link;
1057 };
1058 #endif
1059 /* ARGSUSED */
1060 int
1061 link(td, uap)
1062 	struct thread *td;
1063 	register struct link_args /* {
1064 		char *path;
1065 		char *link;
1066 	} */ *uap;
1067 {
1068 
1069 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1070 }
1071 
1072 int
1073 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1074 {
1075 	struct vnode *vp;
1076 	struct mount *mp;
1077 	struct nameidata nd;
1078 	int error;
1079 
1080 	bwillwrite();
1081 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1082 	if ((error = namei(&nd)) != 0)
1083 		return (error);
1084 	NDFREE(&nd, NDF_ONLY_PNBUF);
1085 	vp = nd.ni_vp;
1086 	if (vp->v_type == VDIR) {
1087 		vrele(vp);
1088 		return (EPERM);		/* POSIX */
1089 	}
1090 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1091 		vrele(vp);
1092 		return (error);
1093 	}
1094 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1095 	if ((error = namei(&nd)) == 0) {
1096 		if (nd.ni_vp != NULL) {
1097 			vrele(nd.ni_vp);
1098 			if (nd.ni_dvp == nd.ni_vp)
1099 				vrele(nd.ni_dvp);
1100 			else
1101 				vput(nd.ni_dvp);
1102 			error = EEXIST;
1103 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1104 		    == 0) {
1105 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1106 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1107 #ifdef MAC
1108 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1109 			    vp, &nd.ni_cnd);
1110 			if (error == 0)
1111 #endif
1112 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1113 			VOP_UNLOCK(vp, 0, td);
1114 			vput(nd.ni_dvp);
1115 		}
1116 		NDFREE(&nd, NDF_ONLY_PNBUF);
1117 	}
1118 	vrele(vp);
1119 	vn_finished_write(mp);
1120 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1121 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1122 	return (error);
1123 }
1124 
1125 /*
1126  * Make a symbolic link.
1127  */
1128 #ifndef _SYS_SYSPROTO_H_
1129 struct symlink_args {
1130 	char	*path;
1131 	char	*link;
1132 };
1133 #endif
1134 /* ARGSUSED */
1135 int
1136 symlink(td, uap)
1137 	struct thread *td;
1138 	register struct symlink_args /* {
1139 		char *path;
1140 		char *link;
1141 	} */ *uap;
1142 {
1143 
1144 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1145 }
1146 
1147 int
1148 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1149 {
1150 	struct mount *mp;
1151 	struct vattr vattr;
1152 	char *syspath;
1153 	int error;
1154 	struct nameidata nd;
1155 
1156 	if (segflg == UIO_SYSSPACE) {
1157 		syspath = path;
1158 	} else {
1159 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1160 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1161 			goto out;
1162 	}
1163 restart:
1164 	bwillwrite();
1165 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1166 	if ((error = namei(&nd)) != 0)
1167 		goto out;
1168 	if (nd.ni_vp) {
1169 		NDFREE(&nd, NDF_ONLY_PNBUF);
1170 		vrele(nd.ni_vp);
1171 		if (nd.ni_vp == nd.ni_dvp)
1172 			vrele(nd.ni_dvp);
1173 		else
1174 			vput(nd.ni_dvp);
1175 		error = EEXIST;
1176 		goto out;
1177 	}
1178 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1179 		NDFREE(&nd, NDF_ONLY_PNBUF);
1180 		vput(nd.ni_dvp);
1181 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1182 			return (error);
1183 		goto restart;
1184 	}
1185 	VATTR_NULL(&vattr);
1186 	FILEDESC_LOCK(td->td_proc->p_fd);
1187 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1188 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1189 #ifdef MAC
1190 	vattr.va_type = VLNK;
1191 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1192 	    &vattr);
1193 	if (error)
1194 		goto out2;
1195 #endif
1196 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1197 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1198 	if (error == 0)
1199 		vput(nd.ni_vp);
1200 #ifdef MAC
1201 out2:
1202 #endif
1203 	NDFREE(&nd, NDF_ONLY_PNBUF);
1204 	vput(nd.ni_dvp);
1205 	vn_finished_write(mp);
1206 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1207 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1208 out:
1209 	if (segflg != UIO_SYSSPACE)
1210 		uma_zfree(namei_zone, syspath);
1211 	return (error);
1212 }
1213 
1214 /*
1215  * Delete a whiteout from the filesystem.
1216  */
1217 /* ARGSUSED */
1218 int
1219 undelete(td, uap)
1220 	struct thread *td;
1221 	register struct undelete_args /* {
1222 		char *path;
1223 	} */ *uap;
1224 {
1225 	int error;
1226 	struct mount *mp;
1227 	struct nameidata nd;
1228 
1229 restart:
1230 	bwillwrite();
1231 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1232 	    uap->path, td);
1233 	error = namei(&nd);
1234 	if (error)
1235 		return (error);
1236 
1237 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1238 		NDFREE(&nd, NDF_ONLY_PNBUF);
1239 		if (nd.ni_vp)
1240 			vrele(nd.ni_vp);
1241 		if (nd.ni_vp == nd.ni_dvp)
1242 			vrele(nd.ni_dvp);
1243 		else
1244 			vput(nd.ni_dvp);
1245 		return (EEXIST);
1246 	}
1247 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1248 		NDFREE(&nd, NDF_ONLY_PNBUF);
1249 		vput(nd.ni_dvp);
1250 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1251 			return (error);
1252 		goto restart;
1253 	}
1254 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1255 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1256 	NDFREE(&nd, NDF_ONLY_PNBUF);
1257 	vput(nd.ni_dvp);
1258 	vn_finished_write(mp);
1259 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1260 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1261 	return (error);
1262 }
1263 
1264 /*
1265  * Delete a name from the filesystem.
1266  */
1267 #ifndef _SYS_SYSPROTO_H_
1268 struct unlink_args {
1269 	char	*path;
1270 };
1271 #endif
1272 /* ARGSUSED */
1273 int
1274 unlink(td, uap)
1275 	struct thread *td;
1276 	struct unlink_args /* {
1277 		char *path;
1278 	} */ *uap;
1279 {
1280 
1281 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1282 }
1283 
1284 int
1285 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1286 {
1287 	struct mount *mp;
1288 	struct vnode *vp;
1289 	int error;
1290 	struct nameidata nd;
1291 
1292 restart:
1293 	bwillwrite();
1294 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1295 	if ((error = namei(&nd)) != 0)
1296 		return (error);
1297 	vp = nd.ni_vp;
1298 	if (vp->v_type == VDIR)
1299 		error = EPERM;		/* POSIX */
1300 	else {
1301 		/*
1302 		 * The root of a mounted filesystem cannot be deleted.
1303 		 *
1304 		 * XXX: can this only be a VDIR case?
1305 		 */
1306 		if (vp->v_vflag & VV_ROOT)
1307 			error = EBUSY;
1308 	}
1309 	if (error == 0) {
1310 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1311 			NDFREE(&nd, NDF_ONLY_PNBUF);
1312 			if (vp == nd.ni_dvp)
1313 				vrele(vp);
1314 			else
1315 				vput(vp);
1316 			vput(nd.ni_dvp);
1317 			if ((error = vn_start_write(NULL, &mp,
1318 			    V_XSLEEP | PCATCH)) != 0)
1319 				return (error);
1320 			goto restart;
1321 		}
1322 #ifdef MAC
1323 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1324 		    &nd.ni_cnd);
1325 		if (error)
1326 			goto out;
1327 #endif
1328 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1329 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1330 #ifdef MAC
1331 out:
1332 #endif
1333 		vn_finished_write(mp);
1334 	}
1335 	NDFREE(&nd, NDF_ONLY_PNBUF);
1336 	if (vp == nd.ni_dvp)
1337 		vrele(vp);
1338 	else
1339 		vput(vp);
1340 	vput(nd.ni_dvp);
1341 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1342 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1343 	return (error);
1344 }
1345 
1346 /*
1347  * Reposition read/write file offset.
1348  */
1349 #ifndef _SYS_SYSPROTO_H_
1350 struct lseek_args {
1351 	int	fd;
1352 	int	pad;
1353 	off_t	offset;
1354 	int	whence;
1355 };
1356 #endif
1357 int
1358 lseek(td, uap)
1359 	struct thread *td;
1360 	register struct lseek_args /* {
1361 		int fd;
1362 		int pad;
1363 		off_t offset;
1364 		int whence;
1365 	} */ *uap;
1366 {
1367 	struct ucred *cred = td->td_ucred;
1368 	struct file *fp;
1369 	struct vnode *vp;
1370 	struct vattr vattr;
1371 	off_t offset;
1372 	int error, noneg;
1373 
1374 	if ((error = fget(td, uap->fd, &fp)) != 0)
1375 		return (error);
1376 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1377 		fdrop(fp, td);
1378 		return (ESPIPE);
1379 	}
1380 	vp = fp->f_vnode;
1381 	noneg = (vp->v_type != VCHR);
1382 	offset = uap->offset;
1383 	switch (uap->whence) {
1384 	case L_INCR:
1385 		if (noneg &&
1386 		    (fp->f_offset < 0 ||
1387 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1388 			error = EOVERFLOW;
1389 			break;
1390 		}
1391 		offset += fp->f_offset;
1392 		break;
1393 	case L_XTND:
1394 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1395 		error = VOP_GETATTR(vp, &vattr, cred, td);
1396 		VOP_UNLOCK(vp, 0, td);
1397 		if (error)
1398 			break;
1399 		if (noneg &&
1400 		    (vattr.va_size > OFF_MAX ||
1401 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1402 			error = EOVERFLOW;
1403 			break;
1404 		}
1405 		offset += vattr.va_size;
1406 		break;
1407 	case L_SET:
1408 		break;
1409 	default:
1410 		error = EINVAL;
1411 	}
1412 	if (error == 0 && noneg && offset < 0)
1413 		error = EINVAL;
1414 	if (error != 0) {
1415 		fdrop(fp, td);
1416 		return (error);
1417 	}
1418 	fp->f_offset = offset;
1419 	*(off_t *)(td->td_retval) = fp->f_offset;
1420 	fdrop(fp, td);
1421 	return (0);
1422 }
1423 
1424 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1425 /*
1426  * Reposition read/write file offset.
1427  */
1428 #ifndef _SYS_SYSPROTO_H_
1429 struct olseek_args {
1430 	int	fd;
1431 	long	offset;
1432 	int	whence;
1433 };
1434 #endif
1435 int
1436 olseek(td, uap)
1437 	struct thread *td;
1438 	register struct olseek_args /* {
1439 		int fd;
1440 		long offset;
1441 		int whence;
1442 	} */ *uap;
1443 {
1444 	struct lseek_args /* {
1445 		int fd;
1446 		int pad;
1447 		off_t offset;
1448 		int whence;
1449 	} */ nuap;
1450 	int error;
1451 
1452 	nuap.fd = uap->fd;
1453 	nuap.offset = uap->offset;
1454 	nuap.whence = uap->whence;
1455 	error = lseek(td, &nuap);
1456 	return (error);
1457 }
1458 #endif /* COMPAT_43 */
1459 
1460 /*
1461  * Check access permissions using passed credentials.
1462  */
1463 static int
1464 vn_access(vp, user_flags, cred, td)
1465 	struct vnode	*vp;
1466 	int		user_flags;
1467 	struct ucred	*cred;
1468 	struct thread	*td;
1469 {
1470 	int error, flags;
1471 
1472 	/* Flags == 0 means only check for existence. */
1473 	error = 0;
1474 	if (user_flags) {
1475 		flags = 0;
1476 		if (user_flags & R_OK)
1477 			flags |= VREAD;
1478 		if (user_flags & W_OK)
1479 			flags |= VWRITE;
1480 		if (user_flags & X_OK)
1481 			flags |= VEXEC;
1482 #ifdef MAC
1483 		error = mac_check_vnode_access(cred, vp, flags);
1484 		if (error)
1485 			return (error);
1486 #endif
1487 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1488 			error = VOP_ACCESS(vp, flags, cred, td);
1489 	}
1490 	return (error);
1491 }
1492 
1493 /*
1494  * Check access permissions using "real" credentials.
1495  */
1496 #ifndef _SYS_SYSPROTO_H_
1497 struct access_args {
1498 	char	*path;
1499 	int	flags;
1500 };
1501 #endif
1502 int
1503 access(td, uap)
1504 	struct thread *td;
1505 	register struct access_args /* {
1506 		char *path;
1507 		int flags;
1508 	} */ *uap;
1509 {
1510 
1511 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1512 }
1513 
1514 int
1515 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1516 {
1517 	struct ucred *cred, *tmpcred;
1518 	register struct vnode *vp;
1519 	int error;
1520 	struct nameidata nd;
1521 
1522 	/*
1523 	 * Create and modify a temporary credential instead of one that
1524 	 * is potentially shared.  This could also mess up socket
1525 	 * buffer accounting which can run in an interrupt context.
1526 	 *
1527 	 * XXX - Depending on how "threads" are finally implemented, it
1528 	 * may be better to explicitly pass the credential to namei()
1529 	 * rather than to modify the potentially shared process structure.
1530 	 */
1531 	cred = td->td_ucred;
1532 	tmpcred = crdup(cred);
1533 	tmpcred->cr_uid = cred->cr_ruid;
1534 	tmpcred->cr_groups[0] = cred->cr_rgid;
1535 	td->td_ucred = tmpcred;
1536 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1537 	if ((error = namei(&nd)) != 0)
1538 		goto out1;
1539 	vp = nd.ni_vp;
1540 
1541 	error = vn_access(vp, flags, tmpcred, td);
1542 	NDFREE(&nd, NDF_ONLY_PNBUF);
1543 	vput(vp);
1544 out1:
1545 	td->td_ucred = cred;
1546 	crfree(tmpcred);
1547 	return (error);
1548 }
1549 
1550 /*
1551  * Check access permissions using "effective" credentials.
1552  */
1553 #ifndef _SYS_SYSPROTO_H_
1554 struct eaccess_args {
1555 	char	*path;
1556 	int	flags;
1557 };
1558 #endif
1559 int
1560 eaccess(td, uap)
1561 	struct thread *td;
1562 	register struct eaccess_args /* {
1563 		char *path;
1564 		int flags;
1565 	} */ *uap;
1566 {
1567 	struct nameidata nd;
1568 	struct vnode *vp;
1569 	int error;
1570 
1571 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1572 	    uap->path, td);
1573 	if ((error = namei(&nd)) != 0)
1574 		return (error);
1575 	vp = nd.ni_vp;
1576 
1577 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1578 	NDFREE(&nd, NDF_ONLY_PNBUF);
1579 	vput(vp);
1580 	return (error);
1581 }
1582 
1583 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1584 /*
1585  * Get file status; this version follows links.
1586  */
1587 #ifndef _SYS_SYSPROTO_H_
1588 struct ostat_args {
1589 	char	*path;
1590 	struct ostat *ub;
1591 };
1592 #endif
1593 /* ARGSUSED */
1594 int
1595 ostat(td, uap)
1596 	struct thread *td;
1597 	register struct ostat_args /* {
1598 		char *path;
1599 		struct ostat *ub;
1600 	} */ *uap;
1601 {
1602 	struct stat sb;
1603 	struct ostat osb;
1604 	int error;
1605 	struct nameidata nd;
1606 
1607 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1608 	    uap->path, td);
1609 	if ((error = namei(&nd)) != 0)
1610 		return (error);
1611 	NDFREE(&nd, NDF_ONLY_PNBUF);
1612 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1613 	vput(nd.ni_vp);
1614 	if (error)
1615 		return (error);
1616 	cvtstat(&sb, &osb);
1617 	error = copyout(&osb, uap->ub, sizeof (osb));
1618 	return (error);
1619 }
1620 
1621 /*
1622  * Get file status; this version does not follow links.
1623  */
1624 #ifndef _SYS_SYSPROTO_H_
1625 struct olstat_args {
1626 	char	*path;
1627 	struct ostat *ub;
1628 };
1629 #endif
1630 /* ARGSUSED */
1631 int
1632 olstat(td, uap)
1633 	struct thread *td;
1634 	register struct olstat_args /* {
1635 		char *path;
1636 		struct ostat *ub;
1637 	} */ *uap;
1638 {
1639 	struct vnode *vp;
1640 	struct stat sb;
1641 	struct ostat osb;
1642 	int error;
1643 	struct nameidata nd;
1644 
1645 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1646 	    uap->path, td);
1647 	if ((error = namei(&nd)) != 0)
1648 		return (error);
1649 	vp = nd.ni_vp;
1650 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1651 	NDFREE(&nd, NDF_ONLY_PNBUF);
1652 	vput(vp);
1653 	if (error)
1654 		return (error);
1655 	cvtstat(&sb, &osb);
1656 	error = copyout(&osb, uap->ub, sizeof (osb));
1657 	return (error);
1658 }
1659 
1660 /*
1661  * Convert from an old to a new stat structure.
1662  */
1663 void
1664 cvtstat(st, ost)
1665 	struct stat *st;
1666 	struct ostat *ost;
1667 {
1668 
1669 	ost->st_dev = st->st_dev;
1670 	ost->st_ino = st->st_ino;
1671 	ost->st_mode = st->st_mode;
1672 	ost->st_nlink = st->st_nlink;
1673 	ost->st_uid = st->st_uid;
1674 	ost->st_gid = st->st_gid;
1675 	ost->st_rdev = st->st_rdev;
1676 	if (st->st_size < (quad_t)1 << 32)
1677 		ost->st_size = st->st_size;
1678 	else
1679 		ost->st_size = -2;
1680 	ost->st_atime = st->st_atime;
1681 	ost->st_mtime = st->st_mtime;
1682 	ost->st_ctime = st->st_ctime;
1683 	ost->st_blksize = st->st_blksize;
1684 	ost->st_blocks = st->st_blocks;
1685 	ost->st_flags = st->st_flags;
1686 	ost->st_gen = st->st_gen;
1687 }
1688 #endif /* COMPAT_43 || COMPAT_SUNOS */
1689 
1690 /*
1691  * Get file status; this version follows links.
1692  */
1693 #ifndef _SYS_SYSPROTO_H_
1694 struct stat_args {
1695 	char	*path;
1696 	struct stat *ub;
1697 };
1698 #endif
1699 /* ARGSUSED */
1700 int
1701 stat(td, uap)
1702 	struct thread *td;
1703 	register struct stat_args /* {
1704 		char *path;
1705 		struct stat *ub;
1706 	} */ *uap;
1707 {
1708 	struct stat sb;
1709 	int error;
1710 	struct nameidata nd;
1711 
1712 #ifdef LOOKUP_SHARED
1713 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1714 	    UIO_USERSPACE, uap->path, td);
1715 #else
1716 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717 	    uap->path, td);
1718 #endif
1719 	if ((error = namei(&nd)) != 0)
1720 		return (error);
1721 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1722 	NDFREE(&nd, NDF_ONLY_PNBUF);
1723 	vput(nd.ni_vp);
1724 	if (error)
1725 		return (error);
1726 	error = copyout(&sb, uap->ub, sizeof (sb));
1727 	return (error);
1728 }
1729 
1730 /*
1731  * Get file status; this version does not follow links.
1732  */
1733 #ifndef _SYS_SYSPROTO_H_
1734 struct lstat_args {
1735 	char	*path;
1736 	struct stat *ub;
1737 };
1738 #endif
1739 /* ARGSUSED */
1740 int
1741 lstat(td, uap)
1742 	struct thread *td;
1743 	register struct lstat_args /* {
1744 		char *path;
1745 		struct stat *ub;
1746 	} */ *uap;
1747 {
1748 	int error;
1749 	struct vnode *vp;
1750 	struct stat sb;
1751 	struct nameidata nd;
1752 
1753 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1754 	    uap->path, td);
1755 	if ((error = namei(&nd)) != 0)
1756 		return (error);
1757 	vp = nd.ni_vp;
1758 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1759 	NDFREE(&nd, NDF_ONLY_PNBUF);
1760 	vput(vp);
1761 	if (error)
1762 		return (error);
1763 	error = copyout(&sb, uap->ub, sizeof (sb));
1764 	return (error);
1765 }
1766 
1767 /*
1768  * Implementation of the NetBSD stat() function.
1769  * XXX This should probably be collapsed with the FreeBSD version,
1770  * as the differences are only due to vn_stat() clearing spares at
1771  * the end of the structures.  vn_stat could be split to avoid this,
1772  * and thus collapse the following to close to zero code.
1773  */
1774 void
1775 cvtnstat(sb, nsb)
1776 	struct stat *sb;
1777 	struct nstat *nsb;
1778 {
1779 	bzero(nsb, sizeof *nsb);
1780 	nsb->st_dev = sb->st_dev;
1781 	nsb->st_ino = sb->st_ino;
1782 	nsb->st_mode = sb->st_mode;
1783 	nsb->st_nlink = sb->st_nlink;
1784 	nsb->st_uid = sb->st_uid;
1785 	nsb->st_gid = sb->st_gid;
1786 	nsb->st_rdev = sb->st_rdev;
1787 	nsb->st_atimespec = sb->st_atimespec;
1788 	nsb->st_mtimespec = sb->st_mtimespec;
1789 	nsb->st_ctimespec = sb->st_ctimespec;
1790 	nsb->st_size = sb->st_size;
1791 	nsb->st_blocks = sb->st_blocks;
1792 	nsb->st_blksize = sb->st_blksize;
1793 	nsb->st_flags = sb->st_flags;
1794 	nsb->st_gen = sb->st_gen;
1795 	nsb->st_birthtimespec = sb->st_birthtimespec;
1796 }
1797 
1798 #ifndef _SYS_SYSPROTO_H_
1799 struct nstat_args {
1800 	char	*path;
1801 	struct nstat *ub;
1802 };
1803 #endif
1804 /* ARGSUSED */
1805 int
1806 nstat(td, uap)
1807 	struct thread *td;
1808 	register struct nstat_args /* {
1809 		char *path;
1810 		struct nstat *ub;
1811 	} */ *uap;
1812 {
1813 	struct stat sb;
1814 	struct nstat nsb;
1815 	int error;
1816 	struct nameidata nd;
1817 
1818 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1819 	    uap->path, td);
1820 	if ((error = namei(&nd)) != 0)
1821 		return (error);
1822 	NDFREE(&nd, NDF_ONLY_PNBUF);
1823 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1824 	vput(nd.ni_vp);
1825 	if (error)
1826 		return (error);
1827 	cvtnstat(&sb, &nsb);
1828 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1829 	return (error);
1830 }
1831 
1832 /*
1833  * NetBSD lstat.  Get file status; this version does not follow links.
1834  */
1835 #ifndef _SYS_SYSPROTO_H_
1836 struct lstat_args {
1837 	char	*path;
1838 	struct stat *ub;
1839 };
1840 #endif
1841 /* ARGSUSED */
1842 int
1843 nlstat(td, uap)
1844 	struct thread *td;
1845 	register struct nlstat_args /* {
1846 		char *path;
1847 		struct nstat *ub;
1848 	} */ *uap;
1849 {
1850 	int error;
1851 	struct vnode *vp;
1852 	struct stat sb;
1853 	struct nstat nsb;
1854 	struct nameidata nd;
1855 
1856 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1857 	    uap->path, td);
1858 	if ((error = namei(&nd)) != 0)
1859 		return (error);
1860 	vp = nd.ni_vp;
1861 	NDFREE(&nd, NDF_ONLY_PNBUF);
1862 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1863 	vput(vp);
1864 	if (error)
1865 		return (error);
1866 	cvtnstat(&sb, &nsb);
1867 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1868 	return (error);
1869 }
1870 
1871 /*
1872  * Get configurable pathname variables.
1873  */
1874 #ifndef _SYS_SYSPROTO_H_
1875 struct pathconf_args {
1876 	char	*path;
1877 	int	name;
1878 };
1879 #endif
1880 /* ARGSUSED */
1881 int
1882 pathconf(td, uap)
1883 	struct thread *td;
1884 	register struct pathconf_args /* {
1885 		char *path;
1886 		int name;
1887 	} */ *uap;
1888 {
1889 	int error;
1890 	struct nameidata nd;
1891 
1892 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1893 	    uap->path, td);
1894 	if ((error = namei(&nd)) != 0)
1895 		return (error);
1896 	NDFREE(&nd, NDF_ONLY_PNBUF);
1897 
1898 	/* If asynchronous I/O is available, it works for all files. */
1899 	if (uap->name == _PC_ASYNC_IO)
1900 		td->td_retval[0] = async_io_version;
1901 	else
1902 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1903 	vput(nd.ni_vp);
1904 	return (error);
1905 }
1906 
1907 /*
1908  * Return target name of a symbolic link.
1909  */
1910 #ifndef _SYS_SYSPROTO_H_
1911 struct readlink_args {
1912 	char	*path;
1913 	char	*buf;
1914 	int	count;
1915 };
1916 #endif
1917 /* ARGSUSED */
1918 int
1919 readlink(td, uap)
1920 	struct thread *td;
1921 	register struct readlink_args /* {
1922 		char *path;
1923 		char *buf;
1924 		int count;
1925 	} */ *uap;
1926 {
1927 
1928 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1929 	    UIO_USERSPACE, uap->count));
1930 }
1931 
1932 int
1933 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1934     enum uio_seg bufseg, int count)
1935 {
1936 	register struct vnode *vp;
1937 	struct iovec aiov;
1938 	struct uio auio;
1939 	int error;
1940 	struct nameidata nd;
1941 
1942 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1943 	if ((error = namei(&nd)) != 0)
1944 		return (error);
1945 	NDFREE(&nd, NDF_ONLY_PNBUF);
1946 	vp = nd.ni_vp;
1947 #ifdef MAC
1948 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1949 	if (error) {
1950 		vput(vp);
1951 		return (error);
1952 	}
1953 #endif
1954 	if (vp->v_type != VLNK)
1955 		error = EINVAL;
1956 	else {
1957 		aiov.iov_base = buf;
1958 		aiov.iov_len = count;
1959 		auio.uio_iov = &aiov;
1960 		auio.uio_iovcnt = 1;
1961 		auio.uio_offset = 0;
1962 		auio.uio_rw = UIO_READ;
1963 		auio.uio_segflg = bufseg;
1964 		auio.uio_td = td;
1965 		auio.uio_resid = count;
1966 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1967 	}
1968 	vput(vp);
1969 	td->td_retval[0] = count - auio.uio_resid;
1970 	return (error);
1971 }
1972 
1973 /*
1974  * Common implementation code for chflags() and fchflags().
1975  */
1976 static int
1977 setfflags(td, vp, flags)
1978 	struct thread *td;
1979 	struct vnode *vp;
1980 	int flags;
1981 {
1982 	int error;
1983 	struct mount *mp;
1984 	struct vattr vattr;
1985 
1986 	/*
1987 	 * Prevent non-root users from setting flags on devices.  When
1988 	 * a device is reused, users can retain ownership of the device
1989 	 * if they are allowed to set flags and programs assume that
1990 	 * chown can't fail when done as root.
1991 	 */
1992 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1993 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1994 		if (error)
1995 			return (error);
1996 	}
1997 
1998 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1999 		return (error);
2000 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2001 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2002 	VATTR_NULL(&vattr);
2003 	vattr.va_flags = flags;
2004 #ifdef MAC
2005 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2006 	if (error == 0)
2007 #endif
2008 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2009 	VOP_UNLOCK(vp, 0, td);
2010 	vn_finished_write(mp);
2011 	return (error);
2012 }
2013 
2014 /*
2015  * Change flags of a file given a path name.
2016  */
2017 #ifndef _SYS_SYSPROTO_H_
2018 struct chflags_args {
2019 	char	*path;
2020 	int	flags;
2021 };
2022 #endif
2023 /* ARGSUSED */
2024 int
2025 chflags(td, uap)
2026 	struct thread *td;
2027 	register struct chflags_args /* {
2028 		char *path;
2029 		int flags;
2030 	} */ *uap;
2031 {
2032 	int error;
2033 	struct nameidata nd;
2034 
2035 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2036 	if ((error = namei(&nd)) != 0)
2037 		return (error);
2038 	NDFREE(&nd, NDF_ONLY_PNBUF);
2039 	error = setfflags(td, nd.ni_vp, uap->flags);
2040 	vrele(nd.ni_vp);
2041 	return error;
2042 }
2043 
2044 /*
2045  * Same as chflags() but doesn't follow symlinks.
2046  */
2047 int
2048 lchflags(td, uap)
2049 	struct thread *td;
2050 	register struct lchflags_args /* {
2051 		char *path;
2052 		int flags;
2053 	} */ *uap;
2054 {
2055 	int error;
2056 	struct nameidata nd;
2057 
2058 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2059 	if ((error = namei(&nd)) != 0)
2060 		return (error);
2061 	NDFREE(&nd, NDF_ONLY_PNBUF);
2062 	error = setfflags(td, nd.ni_vp, uap->flags);
2063 	vrele(nd.ni_vp);
2064 	return error;
2065 }
2066 
2067 /*
2068  * Change flags of a file given a file descriptor.
2069  */
2070 #ifndef _SYS_SYSPROTO_H_
2071 struct fchflags_args {
2072 	int	fd;
2073 	int	flags;
2074 };
2075 #endif
2076 /* ARGSUSED */
2077 int
2078 fchflags(td, uap)
2079 	struct thread *td;
2080 	register struct fchflags_args /* {
2081 		int fd;
2082 		int flags;
2083 	} */ *uap;
2084 {
2085 	struct file *fp;
2086 	int error;
2087 
2088 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2089 		return (error);
2090 	error = setfflags(td, fp->f_vnode, uap->flags);
2091 	fdrop(fp, td);
2092 	return (error);
2093 }
2094 
2095 /*
2096  * Common implementation code for chmod(), lchmod() and fchmod().
2097  */
2098 static int
2099 setfmode(td, vp, mode)
2100 	struct thread *td;
2101 	struct vnode *vp;
2102 	int mode;
2103 {
2104 	int error;
2105 	struct mount *mp;
2106 	struct vattr vattr;
2107 
2108 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2109 		return (error);
2110 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2111 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2112 	VATTR_NULL(&vattr);
2113 	vattr.va_mode = mode & ALLPERMS;
2114 #ifdef MAC
2115 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2116 	if (error == 0)
2117 #endif
2118 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2119 	VOP_UNLOCK(vp, 0, td);
2120 	vn_finished_write(mp);
2121 	return error;
2122 }
2123 
2124 /*
2125  * Change mode of a file given path name.
2126  */
2127 #ifndef _SYS_SYSPROTO_H_
2128 struct chmod_args {
2129 	char	*path;
2130 	int	mode;
2131 };
2132 #endif
2133 /* ARGSUSED */
2134 int
2135 chmod(td, uap)
2136 	struct thread *td;
2137 	register struct chmod_args /* {
2138 		char *path;
2139 		int mode;
2140 	} */ *uap;
2141 {
2142 
2143 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2144 }
2145 
2146 int
2147 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2148 {
2149 	int error;
2150 	struct nameidata nd;
2151 
2152 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2153 	if ((error = namei(&nd)) != 0)
2154 		return (error);
2155 	NDFREE(&nd, NDF_ONLY_PNBUF);
2156 	error = setfmode(td, nd.ni_vp, mode);
2157 	vrele(nd.ni_vp);
2158 	return error;
2159 }
2160 
2161 /*
2162  * Change mode of a file given path name (don't follow links.)
2163  */
2164 #ifndef _SYS_SYSPROTO_H_
2165 struct lchmod_args {
2166 	char	*path;
2167 	int	mode;
2168 };
2169 #endif
2170 /* ARGSUSED */
2171 int
2172 lchmod(td, uap)
2173 	struct thread *td;
2174 	register struct lchmod_args /* {
2175 		char *path;
2176 		int mode;
2177 	} */ *uap;
2178 {
2179 	int error;
2180 	struct nameidata nd;
2181 
2182 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2183 	if ((error = namei(&nd)) != 0)
2184 		return (error);
2185 	NDFREE(&nd, NDF_ONLY_PNBUF);
2186 	error = setfmode(td, nd.ni_vp, uap->mode);
2187 	vrele(nd.ni_vp);
2188 	return error;
2189 }
2190 
2191 /*
2192  * Change mode of a file given a file descriptor.
2193  */
2194 #ifndef _SYS_SYSPROTO_H_
2195 struct fchmod_args {
2196 	int	fd;
2197 	int	mode;
2198 };
2199 #endif
2200 /* ARGSUSED */
2201 int
2202 fchmod(td, uap)
2203 	struct thread *td;
2204 	register struct fchmod_args /* {
2205 		int fd;
2206 		int mode;
2207 	} */ *uap;
2208 {
2209 	struct file *fp;
2210 	int error;
2211 
2212 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2213 		return (error);
2214 	error = setfmode(td, fp->f_vnode, uap->mode);
2215 	fdrop(fp, td);
2216 	return (error);
2217 }
2218 
2219 /*
2220  * Common implementation for chown(), lchown(), and fchown()
2221  */
2222 static int
2223 setfown(td, vp, uid, gid)
2224 	struct thread *td;
2225 	struct vnode *vp;
2226 	uid_t uid;
2227 	gid_t gid;
2228 {
2229 	int error;
2230 	struct mount *mp;
2231 	struct vattr vattr;
2232 
2233 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2234 		return (error);
2235 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2236 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2237 	VATTR_NULL(&vattr);
2238 	vattr.va_uid = uid;
2239 	vattr.va_gid = gid;
2240 #ifdef MAC
2241 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2242 	    vattr.va_gid);
2243 	if (error == 0)
2244 #endif
2245 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2246 	VOP_UNLOCK(vp, 0, td);
2247 	vn_finished_write(mp);
2248 	return error;
2249 }
2250 
2251 /*
2252  * Set ownership given a path name.
2253  */
2254 #ifndef _SYS_SYSPROTO_H_
2255 struct chown_args {
2256 	char	*path;
2257 	int	uid;
2258 	int	gid;
2259 };
2260 #endif
2261 /* ARGSUSED */
2262 int
2263 chown(td, uap)
2264 	struct thread *td;
2265 	register struct chown_args /* {
2266 		char *path;
2267 		int uid;
2268 		int gid;
2269 	} */ *uap;
2270 {
2271 
2272 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2273 }
2274 
2275 int
2276 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2277     int gid)
2278 {
2279 	int error;
2280 	struct nameidata nd;
2281 
2282 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2283 	if ((error = namei(&nd)) != 0)
2284 		return (error);
2285 	NDFREE(&nd, NDF_ONLY_PNBUF);
2286 	error = setfown(td, nd.ni_vp, uid, gid);
2287 	vrele(nd.ni_vp);
2288 	return (error);
2289 }
2290 
2291 /*
2292  * Set ownership given a path name, do not cross symlinks.
2293  */
2294 #ifndef _SYS_SYSPROTO_H_
2295 struct lchown_args {
2296 	char	*path;
2297 	int	uid;
2298 	int	gid;
2299 };
2300 #endif
2301 /* ARGSUSED */
2302 int
2303 lchown(td, uap)
2304 	struct thread *td;
2305 	register struct lchown_args /* {
2306 		char *path;
2307 		int uid;
2308 		int gid;
2309 	} */ *uap;
2310 {
2311 
2312 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2313 }
2314 
2315 int
2316 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2317     int gid)
2318 {
2319 	int error;
2320 	struct nameidata nd;
2321 
2322 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2323 	if ((error = namei(&nd)) != 0)
2324 		return (error);
2325 	NDFREE(&nd, NDF_ONLY_PNBUF);
2326 	error = setfown(td, nd.ni_vp, uid, gid);
2327 	vrele(nd.ni_vp);
2328 	return (error);
2329 }
2330 
2331 /*
2332  * Set ownership given a file descriptor.
2333  */
2334 #ifndef _SYS_SYSPROTO_H_
2335 struct fchown_args {
2336 	int	fd;
2337 	int	uid;
2338 	int	gid;
2339 };
2340 #endif
2341 /* ARGSUSED */
2342 int
2343 fchown(td, uap)
2344 	struct thread *td;
2345 	register struct fchown_args /* {
2346 		int fd;
2347 		int uid;
2348 		int gid;
2349 	} */ *uap;
2350 {
2351 	struct file *fp;
2352 	int error;
2353 
2354 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2355 		return (error);
2356 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2357 	fdrop(fp, td);
2358 	return (error);
2359 }
2360 
2361 /*
2362  * Common implementation code for utimes(), lutimes(), and futimes().
2363  */
2364 static int
2365 getutimes(usrtvp, tvpseg, tsp)
2366 	const struct timeval *usrtvp;
2367 	enum uio_seg tvpseg;
2368 	struct timespec *tsp;
2369 {
2370 	struct timeval tv[2];
2371 	const struct timeval *tvp;
2372 	int error;
2373 
2374 	if (usrtvp == NULL) {
2375 		microtime(&tv[0]);
2376 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2377 		tsp[1] = tsp[0];
2378 	} else {
2379 		if (tvpseg == UIO_SYSSPACE) {
2380 			tvp = usrtvp;
2381 		} else {
2382 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2383 				return (error);
2384 			tvp = tv;
2385 		}
2386 
2387 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2388 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2389 	}
2390 	return 0;
2391 }
2392 
2393 /*
2394  * Common implementation code for utimes(), lutimes(), and futimes().
2395  */
2396 static int
2397 setutimes(td, vp, ts, numtimes, nullflag)
2398 	struct thread *td;
2399 	struct vnode *vp;
2400 	const struct timespec *ts;
2401 	int numtimes;
2402 	int nullflag;
2403 {
2404 	int error, setbirthtime;
2405 	struct mount *mp;
2406 	struct vattr vattr;
2407 
2408 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2409 		return (error);
2410 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2411 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2412 	setbirthtime = 0;
2413 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2414 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2415 		setbirthtime = 1;
2416 	VATTR_NULL(&vattr);
2417 	vattr.va_atime = ts[0];
2418 	vattr.va_mtime = ts[1];
2419 	if (setbirthtime)
2420 		vattr.va_birthtime = ts[1];
2421 	if (numtimes > 2)
2422 		vattr.va_birthtime = ts[2];
2423 	if (nullflag)
2424 		vattr.va_vaflags |= VA_UTIMES_NULL;
2425 #ifdef MAC
2426 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2427 	    vattr.va_mtime);
2428 #endif
2429 	if (error == 0)
2430 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2431 	VOP_UNLOCK(vp, 0, td);
2432 	vn_finished_write(mp);
2433 	return error;
2434 }
2435 
2436 /*
2437  * Set the access and modification times of a file.
2438  */
2439 #ifndef _SYS_SYSPROTO_H_
2440 struct utimes_args {
2441 	char	*path;
2442 	struct	timeval *tptr;
2443 };
2444 #endif
2445 /* ARGSUSED */
2446 int
2447 utimes(td, uap)
2448 	struct thread *td;
2449 	register struct utimes_args /* {
2450 		char *path;
2451 		struct timeval *tptr;
2452 	} */ *uap;
2453 {
2454 
2455 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2456 	    UIO_USERSPACE));
2457 }
2458 
2459 int
2460 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2461     struct timeval *tptr, enum uio_seg tptrseg)
2462 {
2463 	struct timespec ts[2];
2464 	int error;
2465 	struct nameidata nd;
2466 
2467 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2468 		return (error);
2469 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2470 	if ((error = namei(&nd)) != 0)
2471 		return (error);
2472 	NDFREE(&nd, NDF_ONLY_PNBUF);
2473 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2474 	vrele(nd.ni_vp);
2475 	return (error);
2476 }
2477 
2478 /*
2479  * Set the access and modification times of a file.
2480  */
2481 #ifndef _SYS_SYSPROTO_H_
2482 struct lutimes_args {
2483 	char	*path;
2484 	struct	timeval *tptr;
2485 };
2486 #endif
2487 /* ARGSUSED */
2488 int
2489 lutimes(td, uap)
2490 	struct thread *td;
2491 	register struct lutimes_args /* {
2492 		char *path;
2493 		struct timeval *tptr;
2494 	} */ *uap;
2495 {
2496 
2497 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2498 	    UIO_USERSPACE));
2499 }
2500 
2501 int
2502 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2503     struct timeval *tptr, enum uio_seg tptrseg)
2504 {
2505 	struct timespec ts[2];
2506 	int error;
2507 	struct nameidata nd;
2508 
2509 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2510 		return (error);
2511 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2512 	if ((error = namei(&nd)) != 0)
2513 		return (error);
2514 	NDFREE(&nd, NDF_ONLY_PNBUF);
2515 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2516 	vrele(nd.ni_vp);
2517 	return (error);
2518 }
2519 
2520 /*
2521  * Set the access and modification times of a file.
2522  */
2523 #ifndef _SYS_SYSPROTO_H_
2524 struct futimes_args {
2525 	int	fd;
2526 	struct	timeval *tptr;
2527 };
2528 #endif
2529 /* ARGSUSED */
2530 int
2531 futimes(td, uap)
2532 	struct thread *td;
2533 	register struct futimes_args /* {
2534 		int  fd;
2535 		struct timeval *tptr;
2536 	} */ *uap;
2537 {
2538 
2539 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2540 }
2541 
2542 int
2543 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2544     enum uio_seg tptrseg)
2545 {
2546 	struct timespec ts[2];
2547 	struct file *fp;
2548 	int error;
2549 
2550 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2551 		return (error);
2552 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2553 		return (error);
2554 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2555 	fdrop(fp, td);
2556 	return (error);
2557 }
2558 
2559 /*
2560  * Truncate a file given its path name.
2561  */
2562 #ifndef _SYS_SYSPROTO_H_
2563 struct truncate_args {
2564 	char	*path;
2565 	int	pad;
2566 	off_t	length;
2567 };
2568 #endif
2569 /* ARGSUSED */
2570 int
2571 truncate(td, uap)
2572 	struct thread *td;
2573 	register struct truncate_args /* {
2574 		char *path;
2575 		int pad;
2576 		off_t length;
2577 	} */ *uap;
2578 {
2579 
2580 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2581 }
2582 
2583 int
2584 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2585 {
2586 	struct mount *mp;
2587 	struct vnode *vp;
2588 	struct vattr vattr;
2589 	int error;
2590 	struct nameidata nd;
2591 
2592 	if (length < 0)
2593 		return(EINVAL);
2594 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2595 	if ((error = namei(&nd)) != 0)
2596 		return (error);
2597 	vp = nd.ni_vp;
2598 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2599 		vrele(vp);
2600 		return (error);
2601 	}
2602 	NDFREE(&nd, NDF_ONLY_PNBUF);
2603 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2604 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2605 	if (vp->v_type == VDIR)
2606 		error = EISDIR;
2607 #ifdef MAC
2608 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2609 	}
2610 #endif
2611 	else if ((error = vn_writechk(vp)) == 0 &&
2612 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2613 		VATTR_NULL(&vattr);
2614 		vattr.va_size = length;
2615 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2616 	}
2617 	vput(vp);
2618 	vn_finished_write(mp);
2619 	return (error);
2620 }
2621 
2622 /*
2623  * Truncate a file given a file descriptor.
2624  */
2625 #ifndef _SYS_SYSPROTO_H_
2626 struct ftruncate_args {
2627 	int	fd;
2628 	int	pad;
2629 	off_t	length;
2630 };
2631 #endif
2632 /* ARGSUSED */
2633 int
2634 ftruncate(td, uap)
2635 	struct thread *td;
2636 	register struct ftruncate_args /* {
2637 		int fd;
2638 		int pad;
2639 		off_t length;
2640 	} */ *uap;
2641 {
2642 	struct mount *mp;
2643 	struct vattr vattr;
2644 	struct vnode *vp;
2645 	struct file *fp;
2646 	int error;
2647 
2648 	if (uap->length < 0)
2649 		return(EINVAL);
2650 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2651 		return (error);
2652 	if ((fp->f_flag & FWRITE) == 0) {
2653 		fdrop(fp, td);
2654 		return (EINVAL);
2655 	}
2656 	vp = fp->f_vnode;
2657 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2658 		fdrop(fp, td);
2659 		return (error);
2660 	}
2661 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2662 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2663 	if (vp->v_type == VDIR)
2664 		error = EISDIR;
2665 #ifdef MAC
2666 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2667 	    vp))) {
2668 	}
2669 #endif
2670 	else if ((error = vn_writechk(vp)) == 0) {
2671 		VATTR_NULL(&vattr);
2672 		vattr.va_size = uap->length;
2673 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2674 	}
2675 	VOP_UNLOCK(vp, 0, td);
2676 	vn_finished_write(mp);
2677 	fdrop(fp, td);
2678 	return (error);
2679 }
2680 
2681 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2682 /*
2683  * Truncate a file given its path name.
2684  */
2685 #ifndef _SYS_SYSPROTO_H_
2686 struct otruncate_args {
2687 	char	*path;
2688 	long	length;
2689 };
2690 #endif
2691 /* ARGSUSED */
2692 int
2693 otruncate(td, uap)
2694 	struct thread *td;
2695 	register struct otruncate_args /* {
2696 		char *path;
2697 		long length;
2698 	} */ *uap;
2699 {
2700 	struct truncate_args /* {
2701 		char *path;
2702 		int pad;
2703 		off_t length;
2704 	} */ nuap;
2705 
2706 	nuap.path = uap->path;
2707 	nuap.length = uap->length;
2708 	return (truncate(td, &nuap));
2709 }
2710 
2711 /*
2712  * Truncate a file given a file descriptor.
2713  */
2714 #ifndef _SYS_SYSPROTO_H_
2715 struct oftruncate_args {
2716 	int	fd;
2717 	long	length;
2718 };
2719 #endif
2720 /* ARGSUSED */
2721 int
2722 oftruncate(td, uap)
2723 	struct thread *td;
2724 	register struct oftruncate_args /* {
2725 		int fd;
2726 		long length;
2727 	} */ *uap;
2728 {
2729 	struct ftruncate_args /* {
2730 		int fd;
2731 		int pad;
2732 		off_t length;
2733 	} */ nuap;
2734 
2735 	nuap.fd = uap->fd;
2736 	nuap.length = uap->length;
2737 	return (ftruncate(td, &nuap));
2738 }
2739 #endif /* COMPAT_43 || COMPAT_SUNOS */
2740 
2741 /*
2742  * Sync an open file.
2743  */
2744 #ifndef _SYS_SYSPROTO_H_
2745 struct fsync_args {
2746 	int	fd;
2747 };
2748 #endif
2749 /* ARGSUSED */
2750 int
2751 fsync(td, uap)
2752 	struct thread *td;
2753 	struct fsync_args /* {
2754 		int fd;
2755 	} */ *uap;
2756 {
2757 	struct vnode *vp;
2758 	struct mount *mp;
2759 	struct file *fp;
2760 	vm_object_t obj;
2761 	int error;
2762 
2763 	GIANT_REQUIRED;
2764 
2765 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2766 		return (error);
2767 	vp = fp->f_vnode;
2768 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2769 		fdrop(fp, td);
2770 		return (error);
2771 	}
2772 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2773 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2774 		VM_OBJECT_LOCK(obj);
2775 		vm_object_page_clean(obj, 0, 0, 0);
2776 		VM_OBJECT_UNLOCK(obj);
2777 	}
2778 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2779 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2780 	    && softdep_fsync_hook != NULL)
2781 		error = (*softdep_fsync_hook)(vp);
2782 
2783 	VOP_UNLOCK(vp, 0, td);
2784 	vn_finished_write(mp);
2785 	fdrop(fp, td);
2786 	return (error);
2787 }
2788 
2789 /*
2790  * Rename files.  Source and destination must either both be directories,
2791  * or both not be directories.  If target is a directory, it must be empty.
2792  */
2793 #ifndef _SYS_SYSPROTO_H_
2794 struct rename_args {
2795 	char	*from;
2796 	char	*to;
2797 };
2798 #endif
2799 /* ARGSUSED */
2800 int
2801 rename(td, uap)
2802 	struct thread *td;
2803 	register struct rename_args /* {
2804 		char *from;
2805 		char *to;
2806 	} */ *uap;
2807 {
2808 
2809 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2810 }
2811 
2812 int
2813 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2814 {
2815 	struct mount *mp = NULL;
2816 	struct vnode *tvp, *fvp, *tdvp;
2817 	struct nameidata fromnd, tond;
2818 	int error;
2819 
2820 	bwillwrite();
2821 #ifdef MAC
2822 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2823 	    from, td);
2824 #else
2825 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2826 #endif
2827 	if ((error = namei(&fromnd)) != 0)
2828 		return (error);
2829 #ifdef MAC
2830 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2831 	    fromnd.ni_vp, &fromnd.ni_cnd);
2832 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2833 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2834 #endif
2835 	fvp = fromnd.ni_vp;
2836 	if (error == 0)
2837 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2838 	if (error != 0) {
2839 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2840 		vrele(fromnd.ni_dvp);
2841 		vrele(fvp);
2842 		goto out1;
2843 	}
2844 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2845 	    NOOBJ, pathseg, to, td);
2846 	if (fromnd.ni_vp->v_type == VDIR)
2847 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2848 	if ((error = namei(&tond)) != 0) {
2849 		/* Translate error code for rename("dir1", "dir2/."). */
2850 		if (error == EISDIR && fvp->v_type == VDIR)
2851 			error = EINVAL;
2852 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2853 		vrele(fromnd.ni_dvp);
2854 		vrele(fvp);
2855 		goto out1;
2856 	}
2857 	tdvp = tond.ni_dvp;
2858 	tvp = tond.ni_vp;
2859 	if (tvp != NULL) {
2860 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2861 			error = ENOTDIR;
2862 			goto out;
2863 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2864 			error = EISDIR;
2865 			goto out;
2866 		}
2867 	}
2868 	if (fvp == tdvp)
2869 		error = EINVAL;
2870 	/*
2871 	 * If the source is the same as the destination (that is, if they
2872 	 * are links to the same vnode), then there is nothing to do.
2873 	 */
2874 	if (fvp == tvp)
2875 		error = -1;
2876 #ifdef MAC
2877 	else
2878 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2879 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2880 #endif
2881 out:
2882 	if (!error) {
2883 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2884 		if (fromnd.ni_dvp != tdvp) {
2885 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2886 		}
2887 		if (tvp) {
2888 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2889 		}
2890 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2891 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2892 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2893 		NDFREE(&tond, NDF_ONLY_PNBUF);
2894 	} else {
2895 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2896 		NDFREE(&tond, NDF_ONLY_PNBUF);
2897 		if (tdvp == tvp)
2898 			vrele(tdvp);
2899 		else
2900 			vput(tdvp);
2901 		if (tvp)
2902 			vput(tvp);
2903 		vrele(fromnd.ni_dvp);
2904 		vrele(fvp);
2905 	}
2906 	vrele(tond.ni_startdir);
2907 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2908 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2909 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2910 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2911 out1:
2912 	vn_finished_write(mp);
2913 	if (fromnd.ni_startdir)
2914 		vrele(fromnd.ni_startdir);
2915 	if (error == -1)
2916 		return (0);
2917 	return (error);
2918 }
2919 
2920 /*
2921  * Make a directory file.
2922  */
2923 #ifndef _SYS_SYSPROTO_H_
2924 struct mkdir_args {
2925 	char	*path;
2926 	int	mode;
2927 };
2928 #endif
2929 /* ARGSUSED */
2930 int
2931 mkdir(td, uap)
2932 	struct thread *td;
2933 	register struct mkdir_args /* {
2934 		char *path;
2935 		int mode;
2936 	} */ *uap;
2937 {
2938 
2939 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2940 }
2941 
2942 int
2943 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2944 {
2945 	struct mount *mp;
2946 	struct vnode *vp;
2947 	struct vattr vattr;
2948 	int error;
2949 	struct nameidata nd;
2950 
2951 restart:
2952 	bwillwrite();
2953 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2954 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2955 	if ((error = namei(&nd)) != 0)
2956 		return (error);
2957 	vp = nd.ni_vp;
2958 	if (vp != NULL) {
2959 		NDFREE(&nd, NDF_ONLY_PNBUF);
2960 		vrele(vp);
2961 		/*
2962 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2963 		 * the strange behaviour of leaving the vnode unlocked
2964 		 * if the target is the same vnode as the parent.
2965 		 */
2966 		if (vp == nd.ni_dvp)
2967 			vrele(nd.ni_dvp);
2968 		else
2969 			vput(nd.ni_dvp);
2970 		return (EEXIST);
2971 	}
2972 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2973 		NDFREE(&nd, NDF_ONLY_PNBUF);
2974 		vput(nd.ni_dvp);
2975 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2976 			return (error);
2977 		goto restart;
2978 	}
2979 	VATTR_NULL(&vattr);
2980 	vattr.va_type = VDIR;
2981 	FILEDESC_LOCK(td->td_proc->p_fd);
2982 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2983 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2984 #ifdef MAC
2985 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2986 	    &vattr);
2987 	if (error)
2988 		goto out;
2989 #endif
2990 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2991 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2992 #ifdef MAC
2993 out:
2994 #endif
2995 	NDFREE(&nd, NDF_ONLY_PNBUF);
2996 	vput(nd.ni_dvp);
2997 	if (!error)
2998 		vput(nd.ni_vp);
2999 	vn_finished_write(mp);
3000 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3001 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3002 	return (error);
3003 }
3004 
3005 /*
3006  * Remove a directory file.
3007  */
3008 #ifndef _SYS_SYSPROTO_H_
3009 struct rmdir_args {
3010 	char	*path;
3011 };
3012 #endif
3013 /* ARGSUSED */
3014 int
3015 rmdir(td, uap)
3016 	struct thread *td;
3017 	struct rmdir_args /* {
3018 		char *path;
3019 	} */ *uap;
3020 {
3021 
3022 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3023 }
3024 
3025 int
3026 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3027 {
3028 	struct mount *mp;
3029 	struct vnode *vp;
3030 	int error;
3031 	struct nameidata nd;
3032 
3033 restart:
3034 	bwillwrite();
3035 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3036 	if ((error = namei(&nd)) != 0)
3037 		return (error);
3038 	vp = nd.ni_vp;
3039 	if (vp->v_type != VDIR) {
3040 		error = ENOTDIR;
3041 		goto out;
3042 	}
3043 	/*
3044 	 * No rmdir "." please.
3045 	 */
3046 	if (nd.ni_dvp == vp) {
3047 		error = EINVAL;
3048 		goto out;
3049 	}
3050 	/*
3051 	 * The root of a mounted filesystem cannot be deleted.
3052 	 */
3053 	if (vp->v_vflag & VV_ROOT) {
3054 		error = EBUSY;
3055 		goto out;
3056 	}
3057 #ifdef MAC
3058 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3059 	    &nd.ni_cnd);
3060 	if (error)
3061 		goto out;
3062 #endif
3063 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3064 		NDFREE(&nd, NDF_ONLY_PNBUF);
3065 		if (nd.ni_dvp == vp)
3066 			vrele(nd.ni_dvp);
3067 		else
3068 			vput(nd.ni_dvp);
3069 		vput(vp);
3070 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3071 			return (error);
3072 		goto restart;
3073 	}
3074 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3075 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3076 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3077 	vn_finished_write(mp);
3078 out:
3079 	NDFREE(&nd, NDF_ONLY_PNBUF);
3080 	if (nd.ni_dvp == vp)
3081 		vrele(nd.ni_dvp);
3082 	else
3083 		vput(nd.ni_dvp);
3084 	vput(vp);
3085 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3086 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3087 	return (error);
3088 }
3089 
3090 #ifdef COMPAT_43
3091 /*
3092  * Read a block of directory entries in a filesystem independent format.
3093  */
3094 #ifndef _SYS_SYSPROTO_H_
3095 struct ogetdirentries_args {
3096 	int	fd;
3097 	char	*buf;
3098 	u_int	count;
3099 	long	*basep;
3100 };
3101 #endif
3102 int
3103 ogetdirentries(td, uap)
3104 	struct thread *td;
3105 	register struct ogetdirentries_args /* {
3106 		int fd;
3107 		char *buf;
3108 		u_int count;
3109 		long *basep;
3110 	} */ *uap;
3111 {
3112 	struct vnode *vp;
3113 	struct file *fp;
3114 	struct uio auio, kuio;
3115 	struct iovec aiov, kiov;
3116 	struct dirent *dp, *edp;
3117 	caddr_t dirbuf;
3118 	int error, eofflag, readcnt;
3119 	long loff;
3120 
3121 	/* XXX arbitrary sanity limit on `count'. */
3122 	if (uap->count > 64 * 1024)
3123 		return (EINVAL);
3124 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3125 		return (error);
3126 	if ((fp->f_flag & FREAD) == 0) {
3127 		fdrop(fp, td);
3128 		return (EBADF);
3129 	}
3130 	vp = fp->f_vnode;
3131 unionread:
3132 	if (vp->v_type != VDIR) {
3133 		fdrop(fp, td);
3134 		return (EINVAL);
3135 	}
3136 	aiov.iov_base = uap->buf;
3137 	aiov.iov_len = uap->count;
3138 	auio.uio_iov = &aiov;
3139 	auio.uio_iovcnt = 1;
3140 	auio.uio_rw = UIO_READ;
3141 	auio.uio_segflg = UIO_USERSPACE;
3142 	auio.uio_td = td;
3143 	auio.uio_resid = uap->count;
3144 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3145 	loff = auio.uio_offset = fp->f_offset;
3146 #ifdef MAC
3147 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3148 	if (error) {
3149 		VOP_UNLOCK(vp, 0, td);
3150 		fdrop(fp, td);
3151 		return (error);
3152 	}
3153 #endif
3154 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3155 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3156 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3157 			    NULL, NULL);
3158 			fp->f_offset = auio.uio_offset;
3159 		} else
3160 #	endif
3161 	{
3162 		kuio = auio;
3163 		kuio.uio_iov = &kiov;
3164 		kuio.uio_segflg = UIO_SYSSPACE;
3165 		kiov.iov_len = uap->count;
3166 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3167 		kiov.iov_base = dirbuf;
3168 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3169 			    NULL, NULL);
3170 		fp->f_offset = kuio.uio_offset;
3171 		if (error == 0) {
3172 			readcnt = uap->count - kuio.uio_resid;
3173 			edp = (struct dirent *)&dirbuf[readcnt];
3174 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3175 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3176 					/*
3177 					 * The expected low byte of
3178 					 * dp->d_namlen is our dp->d_type.
3179 					 * The high MBZ byte of dp->d_namlen
3180 					 * is our dp->d_namlen.
3181 					 */
3182 					dp->d_type = dp->d_namlen;
3183 					dp->d_namlen = 0;
3184 #				else
3185 					/*
3186 					 * The dp->d_type is the high byte
3187 					 * of the expected dp->d_namlen,
3188 					 * so must be zero'ed.
3189 					 */
3190 					dp->d_type = 0;
3191 #				endif
3192 				if (dp->d_reclen > 0) {
3193 					dp = (struct dirent *)
3194 					    ((char *)dp + dp->d_reclen);
3195 				} else {
3196 					error = EIO;
3197 					break;
3198 				}
3199 			}
3200 			if (dp >= edp)
3201 				error = uiomove(dirbuf, readcnt, &auio);
3202 		}
3203 		FREE(dirbuf, M_TEMP);
3204 	}
3205 	VOP_UNLOCK(vp, 0, td);
3206 	if (error) {
3207 		fdrop(fp, td);
3208 		return (error);
3209 	}
3210 	if (uap->count == auio.uio_resid) {
3211 		if (union_dircheckp) {
3212 			error = union_dircheckp(td, &vp, fp);
3213 			if (error == -1)
3214 				goto unionread;
3215 			if (error) {
3216 				fdrop(fp, td);
3217 				return (error);
3218 			}
3219 		}
3220 		/*
3221 		 * XXX We could delay dropping the lock above but
3222 		 * union_dircheckp complicates things.
3223 		 */
3224 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3225 		if ((vp->v_vflag & VV_ROOT) &&
3226 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3227 			struct vnode *tvp = vp;
3228 			vp = vp->v_mount->mnt_vnodecovered;
3229 			VREF(vp);
3230 			fp->f_vnode = vp;
3231 			fp->f_data = vp;
3232 			fp->f_offset = 0;
3233 			vput(tvp);
3234 			goto unionread;
3235 		}
3236 		VOP_UNLOCK(vp, 0, td);
3237 	}
3238 	error = copyout(&loff, uap->basep, sizeof(long));
3239 	fdrop(fp, td);
3240 	td->td_retval[0] = uap->count - auio.uio_resid;
3241 	return (error);
3242 }
3243 #endif /* COMPAT_43 */
3244 
3245 /*
3246  * Read a block of directory entries in a filesystem independent format.
3247  */
3248 #ifndef _SYS_SYSPROTO_H_
3249 struct getdirentries_args {
3250 	int	fd;
3251 	char	*buf;
3252 	u_int	count;
3253 	long	*basep;
3254 };
3255 #endif
3256 int
3257 getdirentries(td, uap)
3258 	struct thread *td;
3259 	register struct getdirentries_args /* {
3260 		int fd;
3261 		char *buf;
3262 		u_int count;
3263 		long *basep;
3264 	} */ *uap;
3265 {
3266 	struct vnode *vp;
3267 	struct file *fp;
3268 	struct uio auio;
3269 	struct iovec aiov;
3270 	long loff;
3271 	int error, eofflag;
3272 
3273 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3274 		return (error);
3275 	if ((fp->f_flag & FREAD) == 0) {
3276 		fdrop(fp, td);
3277 		return (EBADF);
3278 	}
3279 	vp = fp->f_vnode;
3280 unionread:
3281 	if (vp->v_type != VDIR) {
3282 		fdrop(fp, td);
3283 		return (EINVAL);
3284 	}
3285 	aiov.iov_base = uap->buf;
3286 	aiov.iov_len = uap->count;
3287 	auio.uio_iov = &aiov;
3288 	auio.uio_iovcnt = 1;
3289 	auio.uio_rw = UIO_READ;
3290 	auio.uio_segflg = UIO_USERSPACE;
3291 	auio.uio_td = td;
3292 	auio.uio_resid = uap->count;
3293 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3294 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3295 	loff = auio.uio_offset = fp->f_offset;
3296 #ifdef MAC
3297 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3298 	if (error == 0)
3299 #endif
3300 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3301 		    NULL);
3302 	fp->f_offset = auio.uio_offset;
3303 	VOP_UNLOCK(vp, 0, td);
3304 	if (error) {
3305 		fdrop(fp, td);
3306 		return (error);
3307 	}
3308 	if (uap->count == auio.uio_resid) {
3309 		if (union_dircheckp) {
3310 			error = union_dircheckp(td, &vp, fp);
3311 			if (error == -1)
3312 				goto unionread;
3313 			if (error) {
3314 				fdrop(fp, td);
3315 				return (error);
3316 			}
3317 		}
3318 		/*
3319 		 * XXX We could delay dropping the lock above but
3320 		 * union_dircheckp complicates things.
3321 		 */
3322 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3323 		if ((vp->v_vflag & VV_ROOT) &&
3324 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3325 			struct vnode *tvp = vp;
3326 			vp = vp->v_mount->mnt_vnodecovered;
3327 			VREF(vp);
3328 			fp->f_vnode = vp;
3329 			fp->f_data = vp;
3330 			fp->f_offset = 0;
3331 			vput(tvp);
3332 			goto unionread;
3333 		}
3334 		VOP_UNLOCK(vp, 0, td);
3335 	}
3336 	if (uap->basep != NULL) {
3337 		error = copyout(&loff, uap->basep, sizeof(long));
3338 	}
3339 	td->td_retval[0] = uap->count - auio.uio_resid;
3340 	fdrop(fp, td);
3341 	return (error);
3342 }
3343 #ifndef _SYS_SYSPROTO_H_
3344 struct getdents_args {
3345 	int fd;
3346 	char *buf;
3347 	size_t count;
3348 };
3349 #endif
3350 int
3351 getdents(td, uap)
3352 	struct thread *td;
3353 	register struct getdents_args /* {
3354 		int fd;
3355 		char *buf;
3356 		u_int count;
3357 	} */ *uap;
3358 {
3359 	struct getdirentries_args ap;
3360 	ap.fd = uap->fd;
3361 	ap.buf = uap->buf;
3362 	ap.count = uap->count;
3363 	ap.basep = NULL;
3364 	return getdirentries(td, &ap);
3365 }
3366 
3367 /*
3368  * Set the mode mask for creation of filesystem nodes.
3369  *
3370  * MP SAFE
3371  */
3372 #ifndef _SYS_SYSPROTO_H_
3373 struct umask_args {
3374 	int	newmask;
3375 };
3376 #endif
3377 int
3378 umask(td, uap)
3379 	struct thread *td;
3380 	struct umask_args /* {
3381 		int newmask;
3382 	} */ *uap;
3383 {
3384 	register struct filedesc *fdp;
3385 
3386 	FILEDESC_LOCK(td->td_proc->p_fd);
3387 	fdp = td->td_proc->p_fd;
3388 	td->td_retval[0] = fdp->fd_cmask;
3389 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3390 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3391 	return (0);
3392 }
3393 
3394 /*
3395  * Void all references to file by ripping underlying filesystem
3396  * away from vnode.
3397  */
3398 #ifndef _SYS_SYSPROTO_H_
3399 struct revoke_args {
3400 	char	*path;
3401 };
3402 #endif
3403 /* ARGSUSED */
3404 int
3405 revoke(td, uap)
3406 	struct thread *td;
3407 	register struct revoke_args /* {
3408 		char *path;
3409 	} */ *uap;
3410 {
3411 	struct mount *mp;
3412 	struct vnode *vp;
3413 	struct vattr vattr;
3414 	int error;
3415 	struct nameidata nd;
3416 
3417 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3418 	if ((error = namei(&nd)) != 0)
3419 		return (error);
3420 	vp = nd.ni_vp;
3421 	NDFREE(&nd, NDF_ONLY_PNBUF);
3422 	if (vp->v_type != VCHR) {
3423 		vput(vp);
3424 		return (EINVAL);
3425 	}
3426 #ifdef MAC
3427 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3428 	if (error) {
3429 		vput(vp);
3430 		return (error);
3431 	}
3432 #endif
3433 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3434 	if (error) {
3435 		vput(vp);
3436 		return (error);
3437 	}
3438 	VOP_UNLOCK(vp, 0, td);
3439 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3440 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3441 		if (error)
3442 			goto out;
3443 	}
3444 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3445 		goto out;
3446 	if (vcount(vp) > 1)
3447 		VOP_REVOKE(vp, REVOKEALL);
3448 	vn_finished_write(mp);
3449 out:
3450 	vrele(vp);
3451 	return (error);
3452 }
3453 
3454 /*
3455  * Convert a user file descriptor to a kernel file entry.
3456  * The file entry is locked upon returning.
3457  */
3458 int
3459 getvnode(fdp, fd, fpp)
3460 	struct filedesc *fdp;
3461 	int fd;
3462 	struct file **fpp;
3463 {
3464 	int error;
3465 	struct file *fp;
3466 
3467 	fp = NULL;
3468 	if (fdp == NULL)
3469 		error = EBADF;
3470 	else {
3471 		FILEDESC_LOCK(fdp);
3472 		if ((u_int)fd >= fdp->fd_nfiles ||
3473 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3474 			error = EBADF;
3475 		else if (fp->f_vnode == NULL) {
3476 			fp = NULL;
3477 			error = EINVAL;
3478 		} else {
3479 			fhold(fp);
3480 			error = 0;
3481 		}
3482 		FILEDESC_UNLOCK(fdp);
3483 	}
3484 	*fpp = fp;
3485 	return (error);
3486 }
3487 
3488 /*
3489  * Get (NFS) file handle
3490  */
3491 #ifndef _SYS_SYSPROTO_H_
3492 struct getfh_args {
3493 	char	*fname;
3494 	fhandle_t *fhp;
3495 };
3496 #endif
3497 int
3498 getfh(td, uap)
3499 	struct thread *td;
3500 	register struct getfh_args *uap;
3501 {
3502 	struct nameidata nd;
3503 	fhandle_t fh;
3504 	register struct vnode *vp;
3505 	int error;
3506 
3507 	/*
3508 	 * Must be super user
3509 	 */
3510 	error = suser(td);
3511 	if (error)
3512 		return (error);
3513 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3514 	error = namei(&nd);
3515 	if (error)
3516 		return (error);
3517 	NDFREE(&nd, NDF_ONLY_PNBUF);
3518 	vp = nd.ni_vp;
3519 	bzero(&fh, sizeof(fh));
3520 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3521 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3522 	vput(vp);
3523 	if (error)
3524 		return (error);
3525 	error = copyout(&fh, uap->fhp, sizeof (fh));
3526 	return (error);
3527 }
3528 
3529 /*
3530  * syscall for the rpc.lockd to use to translate a NFS file handle into
3531  * an open descriptor.
3532  *
3533  * warning: do not remove the suser() call or this becomes one giant
3534  * security hole.
3535  */
3536 #ifndef _SYS_SYSPROTO_H_
3537 struct fhopen_args {
3538 	const struct fhandle *u_fhp;
3539 	int flags;
3540 };
3541 #endif
3542 int
3543 fhopen(td, uap)
3544 	struct thread *td;
3545 	struct fhopen_args /* {
3546 		const struct fhandle *u_fhp;
3547 		int flags;
3548 	} */ *uap;
3549 {
3550 	struct proc *p = td->td_proc;
3551 	struct mount *mp;
3552 	struct vnode *vp;
3553 	struct fhandle fhp;
3554 	struct vattr vat;
3555 	struct vattr *vap = &vat;
3556 	struct flock lf;
3557 	struct file *fp;
3558 	register struct filedesc *fdp = p->p_fd;
3559 	int fmode, mode, error, type;
3560 	struct file *nfp;
3561 	int indx;
3562 
3563 	/*
3564 	 * Must be super user
3565 	 */
3566 	error = suser(td);
3567 	if (error)
3568 		return (error);
3569 
3570 	fmode = FFLAGS(uap->flags);
3571 	/* why not allow a non-read/write open for our lockd? */
3572 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3573 		return (EINVAL);
3574 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3575 	if (error)
3576 		return(error);
3577 	/* find the mount point */
3578 	mp = vfs_getvfs(&fhp.fh_fsid);
3579 	if (mp == NULL)
3580 		return (ESTALE);
3581 	/* now give me my vnode, it gets returned to me locked */
3582 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3583 	if (error)
3584 		return (error);
3585  	/*
3586 	 * from now on we have to make sure not
3587 	 * to forget about the vnode
3588 	 * any error that causes an abort must vput(vp)
3589 	 * just set error = err and 'goto bad;'.
3590 	 */
3591 
3592 	/*
3593 	 * from vn_open
3594 	 */
3595 	if (vp->v_type == VLNK) {
3596 		error = EMLINK;
3597 		goto bad;
3598 	}
3599 	if (vp->v_type == VSOCK) {
3600 		error = EOPNOTSUPP;
3601 		goto bad;
3602 	}
3603 	mode = 0;
3604 	if (fmode & (FWRITE | O_TRUNC)) {
3605 		if (vp->v_type == VDIR) {
3606 			error = EISDIR;
3607 			goto bad;
3608 		}
3609 		error = vn_writechk(vp);
3610 		if (error)
3611 			goto bad;
3612 		mode |= VWRITE;
3613 	}
3614 	if (fmode & FREAD)
3615 		mode |= VREAD;
3616 	if (fmode & O_APPEND)
3617 		mode |= VAPPEND;
3618 #ifdef MAC
3619 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3620 	if (error)
3621 		goto bad;
3622 #endif
3623 	if (mode) {
3624 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3625 		if (error)
3626 			goto bad;
3627 	}
3628 	if (fmode & O_TRUNC) {
3629 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3630 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3631 			vrele(vp);
3632 			return (error);
3633 		}
3634 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3635 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3636 #ifdef MAC
3637 		/*
3638 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3639 		 * should be right.
3640 		 */
3641 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3642 		if (error == 0) {
3643 #endif
3644 			VATTR_NULL(vap);
3645 			vap->va_size = 0;
3646 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3647 #ifdef MAC
3648 		}
3649 #endif
3650 		vn_finished_write(mp);
3651 		if (error)
3652 			goto bad;
3653 	}
3654 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3655 	if (error)
3656 		goto bad;
3657 	/*
3658 	 * Make sure that a VM object is created for VMIO support.
3659 	 */
3660 	if (vn_canvmio(vp) == TRUE) {
3661 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3662 			goto bad;
3663 	}
3664 	if (fmode & FWRITE)
3665 		vp->v_writecount++;
3666 
3667 	/*
3668 	 * end of vn_open code
3669 	 */
3670 
3671 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3672 		if (fmode & FWRITE)
3673 			vp->v_writecount--;
3674 		goto bad;
3675 	}
3676 	fp = nfp;
3677 
3678 	/*
3679 	 * Hold an extra reference to avoid having fp ripped out
3680 	 * from under us while we block in the lock op
3681 	 */
3682 	fhold(fp);
3683 	nfp->f_vnode = vp;
3684 	nfp->f_data = vp;
3685 	nfp->f_flag = fmode & FMASK;
3686 	nfp->f_ops = &vnops;
3687 	nfp->f_type = DTYPE_VNODE;
3688 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3689 		lf.l_whence = SEEK_SET;
3690 		lf.l_start = 0;
3691 		lf.l_len = 0;
3692 		if (fmode & O_EXLOCK)
3693 			lf.l_type = F_WRLCK;
3694 		else
3695 			lf.l_type = F_RDLCK;
3696 		type = F_FLOCK;
3697 		if ((fmode & FNONBLOCK) == 0)
3698 			type |= F_WAIT;
3699 		VOP_UNLOCK(vp, 0, td);
3700 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3701 			    type)) != 0) {
3702 			/*
3703 			 * The lock request failed.  Normally close the
3704 			 * descriptor but handle the case where someone might
3705 			 * have dup()d or close()d it when we weren't looking.
3706 			 */
3707 			FILEDESC_LOCK(fdp);
3708 			if (fdp->fd_ofiles[indx] == fp) {
3709 				fdp->fd_ofiles[indx] = NULL;
3710 				FILEDESC_UNLOCK(fdp);
3711 				fdrop(fp, td);
3712 			} else
3713 				FILEDESC_UNLOCK(fdp);
3714 			/*
3715 			 * release our private reference
3716 			 */
3717 			fdrop(fp, td);
3718 			return(error);
3719 		}
3720 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3721 		fp->f_flag |= FHASLOCK;
3722 	}
3723 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3724 		vfs_object_create(vp, td, td->td_ucred);
3725 
3726 	VOP_UNLOCK(vp, 0, td);
3727 	fdrop(fp, td);
3728 	td->td_retval[0] = indx;
3729 	return (0);
3730 
3731 bad:
3732 	vput(vp);
3733 	return (error);
3734 }
3735 
3736 /*
3737  * Stat an (NFS) file handle.
3738  */
3739 #ifndef _SYS_SYSPROTO_H_
3740 struct fhstat_args {
3741 	struct fhandle *u_fhp;
3742 	struct stat *sb;
3743 };
3744 #endif
3745 int
3746 fhstat(td, uap)
3747 	struct thread *td;
3748 	register struct fhstat_args /* {
3749 		struct fhandle *u_fhp;
3750 		struct stat *sb;
3751 	} */ *uap;
3752 {
3753 	struct stat sb;
3754 	fhandle_t fh;
3755 	struct mount *mp;
3756 	struct vnode *vp;
3757 	int error;
3758 
3759 	/*
3760 	 * Must be super user
3761 	 */
3762 	error = suser(td);
3763 	if (error)
3764 		return (error);
3765 
3766 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3767 	if (error)
3768 		return (error);
3769 
3770 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3771 		return (ESTALE);
3772 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3773 		return (error);
3774 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3775 	vput(vp);
3776 	if (error)
3777 		return (error);
3778 	error = copyout(&sb, uap->sb, sizeof(sb));
3779 	return (error);
3780 }
3781 
3782 /*
3783  * Implement fstatfs() for (NFS) file handles.
3784  */
3785 #ifndef _SYS_SYSPROTO_H_
3786 struct fhstatfs_args {
3787 	struct fhandle *u_fhp;
3788 	struct statfs *buf;
3789 };
3790 #endif
3791 int
3792 fhstatfs(td, uap)
3793 	struct thread *td;
3794 	struct fhstatfs_args /* {
3795 		struct fhandle *u_fhp;
3796 		struct statfs *buf;
3797 	} */ *uap;
3798 {
3799 	struct statfs *sp;
3800 	struct mount *mp;
3801 	struct vnode *vp;
3802 	struct statfs sb;
3803 	fhandle_t fh;
3804 	int error;
3805 
3806 	/*
3807 	 * Must be super user
3808 	 */
3809 	error = suser(td);
3810 	if (error)
3811 		return (error);
3812 
3813 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3814 		return (error);
3815 
3816 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3817 		return (ESTALE);
3818 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3819 		return (error);
3820 	mp = vp->v_mount;
3821 	sp = &mp->mnt_stat;
3822 	vput(vp);
3823 #ifdef MAC
3824 	error = mac_check_mount_stat(td->td_ucred, mp);
3825 	if (error)
3826 		return (error);
3827 #endif
3828 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3829 		return (error);
3830 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3831 	if (suser(td)) {
3832 		bcopy(sp, &sb, sizeof(sb));
3833 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3834 		sp = &sb;
3835 	}
3836 	return (copyout(sp, uap->buf, sizeof(*sp)));
3837 }
3838 
3839 /*
3840  * Syscall to push extended attribute configuration information into the
3841  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3842  * a command (int cmd), and attribute name and misc data.  For now, the
3843  * attribute name is left in userspace for consumption by the VFS_op.
3844  * It will probably be changed to be copied into sysspace by the
3845  * syscall in the future, once issues with various consumers of the
3846  * attribute code have raised their hands.
3847  *
3848  * Currently this is used only by UFS Extended Attributes.
3849  */
3850 int
3851 extattrctl(td, uap)
3852 	struct thread *td;
3853 	struct extattrctl_args /* {
3854 		const char *path;
3855 		int cmd;
3856 		const char *filename;
3857 		int attrnamespace;
3858 		const char *attrname;
3859 	} */ *uap;
3860 {
3861 	struct vnode *filename_vp;
3862 	struct nameidata nd;
3863 	struct mount *mp, *mp_writable;
3864 	char attrname[EXTATTR_MAXNAMELEN];
3865 	int error;
3866 
3867 	/*
3868 	 * uap->attrname is not always defined.  We check again later when we
3869 	 * invoke the VFS call so as to pass in NULL there if needed.
3870 	 */
3871 	if (uap->attrname != NULL) {
3872 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3873 		    NULL);
3874 		if (error)
3875 			return (error);
3876 	}
3877 
3878 	/*
3879 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3880 	 * which VFS_EXTATTRCTL() will later release.
3881 	 */
3882 	filename_vp = NULL;
3883 	if (uap->filename != NULL) {
3884 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3885 		    uap->filename, td);
3886 		error = namei(&nd);
3887 		if (error)
3888 			return (error);
3889 		filename_vp = nd.ni_vp;
3890 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3891 	}
3892 
3893 	/* uap->path is always defined. */
3894 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3895 	error = namei(&nd);
3896 	if (error) {
3897 		if (filename_vp != NULL)
3898 			vput(filename_vp);
3899 		return (error);
3900 	}
3901 	mp = nd.ni_vp->v_mount;
3902 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3903 	NDFREE(&nd, 0);
3904 	if (error) {
3905 		if (filename_vp != NULL)
3906 			vput(filename_vp);
3907 		return (error);
3908 	}
3909 
3910 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3911 	    uap->attrname != NULL ? attrname : NULL, td);
3912 
3913 	vn_finished_write(mp_writable);
3914 	/*
3915 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3916 	 * filename_vp, so vrele it if it is defined.
3917 	 */
3918 	if (filename_vp != NULL)
3919 		vrele(filename_vp);
3920 	return (error);
3921 }
3922 
3923 /*-
3924  * Set a named extended attribute on a file or directory
3925  *
3926  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3927  *            kernelspace string pointer "attrname", userspace buffer
3928  *            pointer "data", buffer length "nbytes", thread "td".
3929  * Returns: 0 on success, an error number otherwise
3930  * Locks: none
3931  * References: vp must be a valid reference for the duration of the call
3932  */
3933 static int
3934 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3935     void *data, size_t nbytes, struct thread *td)
3936 {
3937 	struct mount *mp;
3938 	struct uio auio;
3939 	struct iovec aiov;
3940 	ssize_t cnt;
3941 	int error;
3942 
3943 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3944 	if (error)
3945 		return (error);
3946 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3947 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3948 
3949 	aiov.iov_base = data;
3950 	aiov.iov_len = nbytes;
3951 	auio.uio_iov = &aiov;
3952 	auio.uio_iovcnt = 1;
3953 	auio.uio_offset = 0;
3954 	if (nbytes > INT_MAX) {
3955 		error = EINVAL;
3956 		goto done;
3957 	}
3958 	auio.uio_resid = nbytes;
3959 	auio.uio_rw = UIO_WRITE;
3960 	auio.uio_segflg = UIO_USERSPACE;
3961 	auio.uio_td = td;
3962 	cnt = nbytes;
3963 
3964 #ifdef MAC
3965 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3966 	    attrname, &auio);
3967 	if (error)
3968 		goto done;
3969 #endif
3970 
3971 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3972 	    td->td_ucred, td);
3973 	cnt -= auio.uio_resid;
3974 	td->td_retval[0] = cnt;
3975 
3976 done:
3977 	VOP_UNLOCK(vp, 0, td);
3978 	vn_finished_write(mp);
3979 	return (error);
3980 }
3981 
3982 int
3983 extattr_set_fd(td, uap)
3984 	struct thread *td;
3985 	struct extattr_set_fd_args /* {
3986 		int fd;
3987 		int attrnamespace;
3988 		const char *attrname;
3989 		void *data;
3990 		size_t nbytes;
3991 	} */ *uap;
3992 {
3993 	struct file *fp;
3994 	char attrname[EXTATTR_MAXNAMELEN];
3995 	int error;
3996 
3997 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3998 	if (error)
3999 		return (error);
4000 
4001 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4002 	if (error)
4003 		return (error);
4004 
4005 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4006 	    attrname, uap->data, uap->nbytes, td);
4007 	fdrop(fp, td);
4008 
4009 	return (error);
4010 }
4011 
4012 int
4013 extattr_set_file(td, uap)
4014 	struct thread *td;
4015 	struct extattr_set_file_args /* {
4016 		const char *path;
4017 		int attrnamespace;
4018 		const char *attrname;
4019 		void *data;
4020 		size_t nbytes;
4021 	} */ *uap;
4022 {
4023 	struct nameidata nd;
4024 	char attrname[EXTATTR_MAXNAMELEN];
4025 	int error;
4026 
4027 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4028 	if (error)
4029 		return (error);
4030 
4031 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4032 	error = namei(&nd);
4033 	if (error)
4034 		return (error);
4035 	NDFREE(&nd, NDF_ONLY_PNBUF);
4036 
4037 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4038 	    uap->data, uap->nbytes, td);
4039 
4040 	vrele(nd.ni_vp);
4041 	return (error);
4042 }
4043 
4044 int
4045 extattr_set_link(td, uap)
4046 	struct thread *td;
4047 	struct extattr_set_link_args /* {
4048 		const char *path;
4049 		int attrnamespace;
4050 		const char *attrname;
4051 		void *data;
4052 		size_t nbytes;
4053 	} */ *uap;
4054 {
4055 	struct nameidata nd;
4056 	char attrname[EXTATTR_MAXNAMELEN];
4057 	int error;
4058 
4059 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4060 	if (error)
4061 		return (error);
4062 
4063 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4064 	error = namei(&nd);
4065 	if (error)
4066 		return (error);
4067 	NDFREE(&nd, NDF_ONLY_PNBUF);
4068 
4069 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4070 	    uap->data, uap->nbytes, td);
4071 
4072 	vrele(nd.ni_vp);
4073 	return (error);
4074 }
4075 
4076 /*-
4077  * Get a named extended attribute on a file or directory
4078  *
4079  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4080  *            kernelspace string pointer "attrname", userspace buffer
4081  *            pointer "data", buffer length "nbytes", thread "td".
4082  * Returns: 0 on success, an error number otherwise
4083  * Locks: none
4084  * References: vp must be a valid reference for the duration of the call
4085  */
4086 static int
4087 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4088     void *data, size_t nbytes, struct thread *td)
4089 {
4090 	struct uio auio, *auiop;
4091 	struct iovec aiov;
4092 	ssize_t cnt;
4093 	size_t size, *sizep;
4094 	int error;
4095 
4096 	/*
4097 	 * XXX: Temporary API compatibility for applications that know
4098 	 * about this hack ("" means list), but haven't been updated
4099 	 * for the extattr_list_*() system calls yet.  This will go
4100 	 * away for FreeBSD 5.3.
4101 	 */
4102 	if (strlen(attrname) == 0)
4103 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4104 
4105 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4106 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4107 
4108 	/*
4109 	 * Slightly unusual semantics: if the user provides a NULL data
4110 	 * pointer, they don't want to receive the data, just the
4111 	 * maximum read length.
4112 	 */
4113 	auiop = NULL;
4114 	sizep = NULL;
4115 	cnt = 0;
4116 	if (data != NULL) {
4117 		aiov.iov_base = data;
4118 		aiov.iov_len = nbytes;
4119 		auio.uio_iov = &aiov;
4120 		auio.uio_offset = 0;
4121 		if (nbytes > INT_MAX) {
4122 			error = EINVAL;
4123 			goto done;
4124 		}
4125 		auio.uio_resid = nbytes;
4126 		auio.uio_rw = UIO_READ;
4127 		auio.uio_segflg = UIO_USERSPACE;
4128 		auio.uio_td = td;
4129 		auiop = &auio;
4130 		cnt = nbytes;
4131 	} else
4132 		sizep = &size;
4133 
4134 #ifdef MAC
4135 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4136 	    attrname, &auio);
4137 	if (error)
4138 		goto done;
4139 #endif
4140 
4141 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4142 	    td->td_ucred, td);
4143 
4144 	if (auiop != NULL) {
4145 		cnt -= auio.uio_resid;
4146 		td->td_retval[0] = cnt;
4147 	} else
4148 		td->td_retval[0] = size;
4149 
4150 done:
4151 	VOP_UNLOCK(vp, 0, td);
4152 	return (error);
4153 }
4154 
4155 int
4156 extattr_get_fd(td, uap)
4157 	struct thread *td;
4158 	struct extattr_get_fd_args /* {
4159 		int fd;
4160 		int attrnamespace;
4161 		const char *attrname;
4162 		void *data;
4163 		size_t nbytes;
4164 	} */ *uap;
4165 {
4166 	struct file *fp;
4167 	char attrname[EXTATTR_MAXNAMELEN];
4168 	int error;
4169 
4170 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4171 	if (error)
4172 		return (error);
4173 
4174 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4175 	if (error)
4176 		return (error);
4177 
4178 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4179 	    attrname, uap->data, uap->nbytes, td);
4180 
4181 	fdrop(fp, td);
4182 	return (error);
4183 }
4184 
4185 int
4186 extattr_get_file(td, uap)
4187 	struct thread *td;
4188 	struct extattr_get_file_args /* {
4189 		const char *path;
4190 		int attrnamespace;
4191 		const char *attrname;
4192 		void *data;
4193 		size_t nbytes;
4194 	} */ *uap;
4195 {
4196 	struct nameidata nd;
4197 	char attrname[EXTATTR_MAXNAMELEN];
4198 	int error;
4199 
4200 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4201 	if (error)
4202 		return (error);
4203 
4204 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4205 	error = namei(&nd);
4206 	if (error)
4207 		return (error);
4208 	NDFREE(&nd, NDF_ONLY_PNBUF);
4209 
4210 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4211 	    uap->data, uap->nbytes, td);
4212 
4213 	vrele(nd.ni_vp);
4214 	return (error);
4215 }
4216 
4217 int
4218 extattr_get_link(td, uap)
4219 	struct thread *td;
4220 	struct extattr_get_link_args /* {
4221 		const char *path;
4222 		int attrnamespace;
4223 		const char *attrname;
4224 		void *data;
4225 		size_t nbytes;
4226 	} */ *uap;
4227 {
4228 	struct nameidata nd;
4229 	char attrname[EXTATTR_MAXNAMELEN];
4230 	int error;
4231 
4232 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4233 	if (error)
4234 		return (error);
4235 
4236 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4237 	error = namei(&nd);
4238 	if (error)
4239 		return (error);
4240 	NDFREE(&nd, NDF_ONLY_PNBUF);
4241 
4242 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4243 	    uap->data, uap->nbytes, td);
4244 
4245 	vrele(nd.ni_vp);
4246 	return (error);
4247 }
4248 
4249 /*
4250  * extattr_delete_vp(): Delete a named extended attribute on a file or
4251  *                      directory
4252  *
4253  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4254  *            kernelspace string pointer "attrname", proc "p"
4255  * Returns: 0 on success, an error number otherwise
4256  * Locks: none
4257  * References: vp must be a valid reference for the duration of the call
4258  */
4259 static int
4260 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4261     struct thread *td)
4262 {
4263 	struct mount *mp;
4264 	int error;
4265 
4266 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4267 	if (error)
4268 		return (error);
4269 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4270 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4271 
4272 #ifdef MAC
4273 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4274 	    attrname);
4275 	if (error)
4276 		goto done;
4277 #endif
4278 
4279 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4280 	    td);
4281 	if (error == EOPNOTSUPP)
4282 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4283 		    td->td_ucred, td);
4284 #ifdef MAC
4285 done:
4286 #endif
4287 	VOP_UNLOCK(vp, 0, td);
4288 	vn_finished_write(mp);
4289 	return (error);
4290 }
4291 
4292 int
4293 extattr_delete_fd(td, uap)
4294 	struct thread *td;
4295 	struct extattr_delete_fd_args /* {
4296 		int fd;
4297 		int attrnamespace;
4298 		const char *attrname;
4299 	} */ *uap;
4300 {
4301 	struct file *fp;
4302 	struct vnode *vp;
4303 	char attrname[EXTATTR_MAXNAMELEN];
4304 	int error;
4305 
4306 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4307 	if (error)
4308 		return (error);
4309 
4310 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4311 	if (error)
4312 		return (error);
4313 	vp = fp->f_vnode;
4314 
4315 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4316 	fdrop(fp, td);
4317 	return (error);
4318 }
4319 
4320 int
4321 extattr_delete_file(td, uap)
4322 	struct thread *td;
4323 	struct extattr_delete_file_args /* {
4324 		const char *path;
4325 		int attrnamespace;
4326 		const char *attrname;
4327 	} */ *uap;
4328 {
4329 	struct nameidata nd;
4330 	char attrname[EXTATTR_MAXNAMELEN];
4331 	int error;
4332 
4333 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4334 	if (error)
4335 		return(error);
4336 
4337 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4338 	error = namei(&nd);
4339 	if (error)
4340 		return(error);
4341 	NDFREE(&nd, NDF_ONLY_PNBUF);
4342 
4343 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4344 	vrele(nd.ni_vp);
4345 	return(error);
4346 }
4347 
4348 int
4349 extattr_delete_link(td, uap)
4350 	struct thread *td;
4351 	struct extattr_delete_link_args /* {
4352 		const char *path;
4353 		int attrnamespace;
4354 		const char *attrname;
4355 	} */ *uap;
4356 {
4357 	struct nameidata nd;
4358 	char attrname[EXTATTR_MAXNAMELEN];
4359 	int error;
4360 
4361 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4362 	if (error)
4363 		return(error);
4364 
4365 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4366 	error = namei(&nd);
4367 	if (error)
4368 		return(error);
4369 	NDFREE(&nd, NDF_ONLY_PNBUF);
4370 
4371 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4372 	vrele(nd.ni_vp);
4373 	return(error);
4374 }
4375 
4376 /*-
4377  * Retrieve a list of extended attributes on a file or directory.
4378  *
4379  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4380  *            userspace buffer pointer "data", buffer length "nbytes",
4381  *            thread "td".
4382  * Returns: 0 on success, an error number otherwise
4383  * Locks: none
4384  * References: vp must be a valid reference for the duration of the call
4385  */
4386 static int
4387 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4388     size_t nbytes, struct thread *td)
4389 {
4390 	struct uio auio, *auiop;
4391 	size_t size, *sizep;
4392 	struct iovec aiov;
4393 	ssize_t cnt;
4394 	int error;
4395 
4396 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4397 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4398 
4399 	auiop = NULL;
4400 	sizep = NULL;
4401 	cnt = 0;
4402 	if (data != NULL) {
4403 		aiov.iov_base = data;
4404 		aiov.iov_len = nbytes;
4405 		auio.uio_iov = &aiov;
4406 		auio.uio_offset = 0;
4407 		if (nbytes > INT_MAX) {
4408 			error = EINVAL;
4409 			goto done;
4410 		}
4411 		auio.uio_resid = nbytes;
4412 		auio.uio_rw = UIO_READ;
4413 		auio.uio_segflg = UIO_USERSPACE;
4414 		auio.uio_td = td;
4415 		auiop = &auio;
4416 		cnt = nbytes;
4417 	} else
4418 		sizep = &size;
4419 
4420 #ifdef MAC
4421 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4422 	if (error)
4423 		goto done;
4424 #endif
4425 
4426 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4427 	    td->td_ucred, td);
4428 
4429 	if (auiop != NULL) {
4430 		cnt -= auio.uio_resid;
4431 		td->td_retval[0] = cnt;
4432 	} else
4433 		td->td_retval[0] = size;
4434 
4435 done:
4436 	VOP_UNLOCK(vp, 0, td);
4437 	return (error);
4438 }
4439 
4440 
4441 int
4442 extattr_list_fd(td, uap)
4443 	struct thread *td;
4444 	struct extattr_list_fd_args /* {
4445 		int fd;
4446 		int attrnamespace;
4447 		void *data;
4448 		size_t nbytes;
4449 	} */ *uap;
4450 {
4451 	struct file *fp;
4452 	int error;
4453 
4454 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4455 	if (error)
4456 		return (error);
4457 
4458 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4459 	    uap->nbytes, td);
4460 
4461 	fdrop(fp, td);
4462 	return (error);
4463 }
4464 
4465 int
4466 extattr_list_file(td, uap)
4467 	struct thread*td;
4468 	struct extattr_list_file_args /* {
4469 		const char *path;
4470 		int attrnamespace;
4471 		void *data;
4472 		size_t nbytes;
4473 	} */ *uap;
4474 {
4475 	struct nameidata nd;
4476 	int error;
4477 
4478 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4479 	error = namei(&nd);
4480 	if (error)
4481 		return (error);
4482 	NDFREE(&nd, NDF_ONLY_PNBUF);
4483 
4484 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4485 	    uap->nbytes, td);
4486 
4487 	vrele(nd.ni_vp);
4488 	return (error);
4489 }
4490 
4491 int
4492 extattr_list_link(td, uap)
4493 	struct thread*td;
4494 	struct extattr_list_link_args /* {
4495 		const char *path;
4496 		int attrnamespace;
4497 		void *data;
4498 		size_t nbytes;
4499 	} */ *uap;
4500 {
4501 	struct nameidata nd;
4502 	int error;
4503 
4504 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4505 	error = namei(&nd);
4506 	if (error)
4507 		return (error);
4508 	NDFREE(&nd, NDF_ONLY_PNBUF);
4509 
4510 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4511 	    uap->nbytes, td);
4512 
4513 	vrele(nd.ni_vp);
4514 	return (error);
4515 }
4516 
4517