xref: /freebsd/sys/kern/vfs_syscalls.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/mac.h>
52 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/linker.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/proc.h>
67 #include <sys/dirent.h>
68 #include <sys/extattr.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 
73 #include <machine/limits.h>
74 #include <machine/stdarg.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int change_dir(struct nameidata *ndp, struct thread *td);
82 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
83 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
84 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
85 static int setfmode(struct thread *td, struct vnode *, int);
86 static int setfflags(struct thread *td, struct vnode *, int);
87 static int setutimes(struct thread *td, struct vnode *,
88     const struct timespec *, int, int);
89 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90     struct thread *td);
91 
92 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93 int (*softdep_fsync_hook)(struct vnode *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108         int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int asyncflag;
125 
126 	mtx_lock(&mountlist_mtx);
127 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129 			nmp = TAILQ_NEXT(mp, mnt_list);
130 			continue;
131 		}
132 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
133 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
134 			asyncflag = mp->mnt_flag & MNT_ASYNC;
135 			mp->mnt_flag &= ~MNT_ASYNC;
136 			vfs_msync(mp, MNT_NOWAIT);
137 			VFS_SYNC(mp, MNT_NOWAIT,
138 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
139 			mp->mnt_flag |= asyncflag;
140 			vn_finished_write(mp);
141 		}
142 		mtx_lock(&mountlist_mtx);
143 		nmp = TAILQ_NEXT(mp, mnt_list);
144 		vfs_unbusy(mp, td);
145 	}
146 	mtx_unlock(&mountlist_mtx);
147 #if 0
148 /*
149  * XXX don't call vfs_bufstats() yet because that routine
150  * was not imported in the Lite2 merge.
151  */
152 #ifdef DIAGNOSTIC
153 	if (syncprt)
154 		vfs_bufstats();
155 #endif /* DIAGNOSTIC */
156 #endif
157 	return (0);
158 }
159 
160 /* XXX PRISON: could be per prison flag */
161 static int prison_quotas;
162 #if 0
163 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
164 #endif
165 
166 /*
167  * Change filesystem quotas.
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 /* ARGSUSED */
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		char *path;
183 		int cmd;
184 		int uid;
185 		caddr_t arg;
186 	} */ *uap;
187 {
188 	struct mount *mp;
189 	int error;
190 	struct nameidata nd;
191 
192 	if (jailed(td->td_ucred) && !prison_quotas)
193 		return (EPERM);
194 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
195 	if ((error = namei(&nd)) != 0)
196 		return (error);
197 	NDFREE(&nd, NDF_ONLY_PNBUF);
198 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
199 	vrele(nd.ni_vp);
200 	if (error)
201 		return (error);
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vn_finished_write(mp);
204 	return (error);
205 }
206 
207 /*
208  * Get filesystem statistics.
209  */
210 #ifndef _SYS_SYSPROTO_H_
211 struct statfs_args {
212 	char *path;
213 	struct statfs *buf;
214 };
215 #endif
216 /* ARGSUSED */
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	register struct mount *mp;
226 	register struct statfs *sp;
227 	int error;
228 	struct nameidata nd;
229 	struct statfs sb;
230 
231 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
232 	if ((error = namei(&nd)) != 0)
233 		return (error);
234 	mp = nd.ni_vp->v_mount;
235 	sp = &mp->mnt_stat;
236 	NDFREE(&nd, NDF_ONLY_PNBUF);
237 	vrele(nd.ni_vp);
238 #ifdef MAC
239 	error = mac_check_mount_stat(td->td_ucred, mp);
240 	if (error)
241 		return (error);
242 #endif
243 	error = VFS_STATFS(mp, sp, td);
244 	if (error)
245 		return (error);
246 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
247 	if (suser(td)) {
248 		bcopy(sp, &sb, sizeof(sb));
249 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
250 		sp = &sb;
251 	}
252 	return (copyout(sp, uap->buf, sizeof(*sp)));
253 }
254 
255 /*
256  * Get filesystem statistics.
257  */
258 #ifndef _SYS_SYSPROTO_H_
259 struct fstatfs_args {
260 	int fd;
261 	struct statfs *buf;
262 };
263 #endif
264 /* ARGSUSED */
265 int
266 fstatfs(td, uap)
267 	struct thread *td;
268 	register struct fstatfs_args /* {
269 		int fd;
270 		struct statfs *buf;
271 	} */ *uap;
272 {
273 	struct file *fp;
274 	struct mount *mp;
275 	register struct statfs *sp;
276 	int error;
277 	struct statfs sb;
278 
279 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
280 		return (error);
281 	mp = ((struct vnode *)fp->f_data)->v_mount;
282 	fdrop(fp, td);
283 	if (mp == NULL)
284 		return (EBADF);
285 #ifdef MAC
286 	error = mac_check_mount_stat(td->td_ucred, mp);
287 	if (error)
288 		return (error);
289 #endif
290 	sp = &mp->mnt_stat;
291 	error = VFS_STATFS(mp, sp, td);
292 	if (error)
293 		return (error);
294 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
295 	if (suser(td)) {
296 		bcopy(sp, &sb, sizeof(sb));
297 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
298 		sp = &sb;
299 	}
300 	return (copyout(sp, uap->buf, sizeof(*sp)));
301 }
302 
303 /*
304  * Get statistics on all filesystems.
305  */
306 #ifndef _SYS_SYSPROTO_H_
307 struct getfsstat_args {
308 	struct statfs *buf;
309 	long bufsize;
310 	int flags;
311 };
312 #endif
313 int
314 getfsstat(td, uap)
315 	struct thread *td;
316 	register struct getfsstat_args /* {
317 		struct statfs *buf;
318 		long bufsize;
319 		int flags;
320 	} */ *uap;
321 {
322 	register struct mount *mp, *nmp;
323 	register struct statfs *sp;
324 	caddr_t sfsp;
325 	long count, maxcount, error;
326 
327 	maxcount = uap->bufsize / sizeof(struct statfs);
328 	sfsp = (caddr_t)uap->buf;
329 	count = 0;
330 	mtx_lock(&mountlist_mtx);
331 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
332 #ifdef MAC
333 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
334 			nmp = TAILQ_NEXT(mp, mnt_list);
335 			continue;
336 		}
337 #endif
338 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
339 			nmp = TAILQ_NEXT(mp, mnt_list);
340 			continue;
341 		}
342 		if (sfsp && count < maxcount) {
343 			sp = &mp->mnt_stat;
344 			/*
345 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
346 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
347 			 * overrides MNT_WAIT.
348 			 */
349 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
350 			    (uap->flags & MNT_WAIT)) &&
351 			    (error = VFS_STATFS(mp, sp, td))) {
352 				mtx_lock(&mountlist_mtx);
353 				nmp = TAILQ_NEXT(mp, mnt_list);
354 				vfs_unbusy(mp, td);
355 				continue;
356 			}
357 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
358 			error = copyout(sp, sfsp, sizeof(*sp));
359 			if (error) {
360 				vfs_unbusy(mp, td);
361 				return (error);
362 			}
363 			sfsp += sizeof(*sp);
364 		}
365 		count++;
366 		mtx_lock(&mountlist_mtx);
367 		nmp = TAILQ_NEXT(mp, mnt_list);
368 		vfs_unbusy(mp, td);
369 	}
370 	mtx_unlock(&mountlist_mtx);
371 	if (sfsp && count > maxcount)
372 		td->td_retval[0] = maxcount;
373 	else
374 		td->td_retval[0] = count;
375 	return (0);
376 }
377 
378 /*
379  * Change current working directory to a given file descriptor.
380  */
381 #ifndef _SYS_SYSPROTO_H_
382 struct fchdir_args {
383 	int	fd;
384 };
385 #endif
386 /* ARGSUSED */
387 int
388 fchdir(td, uap)
389 	struct thread *td;
390 	struct fchdir_args /* {
391 		int fd;
392 	} */ *uap;
393 {
394 	register struct filedesc *fdp = td->td_proc->p_fd;
395 	struct vnode *vp, *tdp, *vpold;
396 	struct mount *mp;
397 	struct file *fp;
398 	int error;
399 
400 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
401 		return (error);
402 	vp = fp->f_data;
403 	VREF(vp);
404 	fdrop(fp, td);
405 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
406 	if (vp->v_type != VDIR)
407 		error = ENOTDIR;
408 #ifdef MAC
409 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
410 	}
411 #endif
412 	else
413 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
414 	while (!error && (mp = vp->v_mountedhere) != NULL) {
415 		if (vfs_busy(mp, 0, 0, td))
416 			continue;
417 		error = VFS_ROOT(mp, &tdp);
418 		vfs_unbusy(mp, td);
419 		if (error)
420 			break;
421 		vput(vp);
422 		vp = tdp;
423 	}
424 	if (error) {
425 		vput(vp);
426 		return (error);
427 	}
428 	VOP_UNLOCK(vp, 0, td);
429 	FILEDESC_LOCK(fdp);
430 	vpold = fdp->fd_cdir;
431 	fdp->fd_cdir = vp;
432 	FILEDESC_UNLOCK(fdp);
433 	vrele(vpold);
434 	return (0);
435 }
436 
437 /*
438  * Change current working directory (``.'').
439  */
440 #ifndef _SYS_SYSPROTO_H_
441 struct chdir_args {
442 	char	*path;
443 };
444 #endif
445 /* ARGSUSED */
446 int
447 chdir(td, uap)
448 	struct thread *td;
449 	struct chdir_args /* {
450 		char *path;
451 	} */ *uap;
452 {
453 
454 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
455 }
456 
457 int
458 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
459 {
460 	register struct filedesc *fdp = td->td_proc->p_fd;
461 	int error;
462 	struct nameidata nd;
463 	struct vnode *vp;
464 
465 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
466 	if ((error = change_dir(&nd, td)) != 0)
467 		return (error);
468 	VOP_UNLOCK(nd.ni_vp, 0, td);
469 	NDFREE(&nd, NDF_ONLY_PNBUF);
470 	FILEDESC_LOCK(fdp);
471 	vp = fdp->fd_cdir;
472 	fdp->fd_cdir = nd.ni_vp;
473 	FILEDESC_UNLOCK(fdp);
474 	vrele(vp);
475 	return (0);
476 }
477 
478 /*
479  * Helper function for raised chroot(2) security function:  Refuse if
480  * any filedescriptors are open directories.
481  */
482 static int
483 chroot_refuse_vdir_fds(fdp)
484 	struct filedesc *fdp;
485 {
486 	struct vnode *vp;
487 	struct file *fp;
488 	int fd;
489 
490 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
491 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
492 		fp = fget_locked(fdp, fd);
493 		if (fp == NULL)
494 			continue;
495 		if (fp->f_type == DTYPE_VNODE) {
496 			vp = fp->f_data;
497 			if (vp->v_type == VDIR)
498 				return (EPERM);
499 		}
500 	}
501 	return (0);
502 }
503 
504 /*
505  * This sysctl determines if we will allow a process to chroot(2) if it
506  * has a directory open:
507  *	0: disallowed for all processes.
508  *	1: allowed for processes that were not already chroot(2)'ed.
509  *	2: allowed for all processes.
510  */
511 
512 static int chroot_allow_open_directories = 1;
513 
514 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
515      &chroot_allow_open_directories, 0, "");
516 
517 /*
518  * Change notion of root (``/'') directory.
519  */
520 #ifndef _SYS_SYSPROTO_H_
521 struct chroot_args {
522 	char	*path;
523 };
524 #endif
525 /* ARGSUSED */
526 int
527 chroot(td, uap)
528 	struct thread *td;
529 	struct chroot_args /* {
530 		char *path;
531 	} */ *uap;
532 {
533 	register struct filedesc *fdp = td->td_proc->p_fd;
534 	int error;
535 	struct nameidata nd;
536 	struct vnode *vp;
537 
538 	error = suser_cred(td->td_ucred, PRISON_ROOT);
539 	if (error)
540 		return (error);
541 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
542 	mtx_lock(&Giant);
543 	if ((error = change_dir(&nd, td)) != 0)
544 		goto error;
545 #ifdef MAC
546 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp))) {
547 		vput(nd.ni_vp);
548 		goto error;
549 	}
550 #endif
551 	VOP_UNLOCK(nd.ni_vp, 0, td);
552 	FILEDESC_LOCK(fdp);
553 	if (chroot_allow_open_directories == 0 ||
554 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
555 		error = chroot_refuse_vdir_fds(fdp);
556 		if (error)
557 			goto error_unlock;
558 	}
559 	vp = fdp->fd_rdir;
560 	fdp->fd_rdir = nd.ni_vp;
561 	if (!fdp->fd_jdir) {
562 		fdp->fd_jdir = nd.ni_vp;
563                 VREF(fdp->fd_jdir);
564 	}
565 	FILEDESC_UNLOCK(fdp);
566 	NDFREE(&nd, NDF_ONLY_PNBUF);
567 	vrele(vp);
568 	mtx_unlock(&Giant);
569 	return (0);
570 error_unlock:
571 	FILEDESC_UNLOCK(fdp);
572 error:
573 	mtx_unlock(&Giant);
574 	NDFREE(&nd, NDF_ONLY_PNBUF);
575 	return (error);
576 }
577 
578 /*
579  * Common routine for chroot and chdir.  On success, the directory vnode
580  * is returned locked, and must be unlocked by the caller.
581  */
582 static int
583 change_dir(ndp, td)
584 	register struct nameidata *ndp;
585 	struct thread *td;
586 {
587 	struct vnode *vp;
588 	int error;
589 
590 	error = namei(ndp);
591 	if (error)
592 		return (error);
593 	vp = ndp->ni_vp;
594 	if (vp->v_type != VDIR)
595 		error = ENOTDIR;
596 #ifdef MAC
597 	if (error == 0)
598 		error = mac_check_vnode_chdir(td->td_ucred, vp);
599 #endif
600 	if (error == 0)
601 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
602 	if (error)
603 		vput(vp);
604 	return (error);
605 }
606 
607 /*
608  * Check permissions, allocate an open file structure,
609  * and call the device open routine if any.
610  */
611 #ifndef _SYS_SYSPROTO_H_
612 struct open_args {
613 	char	*path;
614 	int	flags;
615 	int	mode;
616 };
617 #endif
618 int
619 open(td, uap)
620 	struct thread *td;
621 	register struct open_args /* {
622 		char *path;
623 		int flags;
624 		int mode;
625 	} */ *uap;
626 {
627 
628 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
629 }
630 
631 int
632 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
633     int mode)
634 {
635 	struct proc *p = td->td_proc;
636 	struct filedesc *fdp = p->p_fd;
637 	struct file *fp;
638 	struct vnode *vp;
639 	struct vattr vat;
640 	struct mount *mp;
641 	int cmode, oflags;
642 	struct file *nfp;
643 	int type, indx, error;
644 	struct flock lf;
645 	struct nameidata nd;
646 
647 	if ((flags & O_ACCMODE) == O_ACCMODE)
648 		return (EINVAL);
649 	oflags = flags;
650 	flags = FFLAGS(flags);
651 	error = falloc(td, &nfp, &indx);
652 	if (error)
653 		return (error);
654 	fp = nfp;
655 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
656 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
657 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
658 	/*
659 	 * Bump the ref count to prevent another process from closing
660 	 * the descriptor while we are blocked in vn_open()
661 	 */
662 	fhold(fp);
663 	error = vn_open(&nd, &flags, cmode);
664 	if (error) {
665 		/*
666 		 * release our own reference
667 		 */
668 		fdrop(fp, td);
669 
670 		/*
671 		 * handle special fdopen() case.  bleh.  dupfdopen() is
672 		 * responsible for dropping the old contents of ofiles[indx]
673 		 * if it succeeds.
674 		 */
675 		if ((error == ENODEV || error == ENXIO) &&
676 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
677 		    (error =
678 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
679 			td->td_retval[0] = indx;
680 			return (0);
681 		}
682 		/*
683 		 * Clean up the descriptor, but only if another thread hadn't
684 		 * replaced or closed it.
685 		 */
686 		FILEDESC_LOCK(fdp);
687 		if (fdp->fd_ofiles[indx] == fp) {
688 			fdp->fd_ofiles[indx] = NULL;
689 			FILEDESC_UNLOCK(fdp);
690 			fdrop(fp, td);
691 		} else
692 			FILEDESC_UNLOCK(fdp);
693 
694 		if (error == ERESTART)
695 			error = EINTR;
696 		return (error);
697 	}
698 	td->td_dupfd = 0;
699 	NDFREE(&nd, NDF_ONLY_PNBUF);
700 	vp = nd.ni_vp;
701 
702 	/*
703 	 * There should be 2 references on the file, one from the descriptor
704 	 * table, and one for us.
705 	 *
706 	 * Handle the case where someone closed the file (via its file
707 	 * descriptor) while we were blocked.  The end result should look
708 	 * like opening the file succeeded but it was immediately closed.
709 	 */
710 	FILEDESC_LOCK(fdp);
711 	FILE_LOCK(fp);
712 	if (fp->f_count == 1) {
713 		KASSERT(fdp->fd_ofiles[indx] != fp,
714 		    ("Open file descriptor lost all refs"));
715 		FILEDESC_UNLOCK(fdp);
716 		FILE_UNLOCK(fp);
717 		VOP_UNLOCK(vp, 0, td);
718 		vn_close(vp, flags & FMASK, fp->f_cred, td);
719 		fdrop(fp, td);
720 		td->td_retval[0] = indx;
721 		return 0;
722 	}
723 
724 	/* assert that vn_open created a backing object if one is needed */
725 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
726 		("open: vmio vnode has no backing object after vn_open"));
727 
728 	fp->f_data = vp;
729 	fp->f_flag = flags & FMASK;
730 	fp->f_ops = &vnops;
731 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
732 	FILEDESC_UNLOCK(fdp);
733 	FILE_UNLOCK(fp);
734 	VOP_UNLOCK(vp, 0, td);
735 	if (flags & (O_EXLOCK | O_SHLOCK)) {
736 		lf.l_whence = SEEK_SET;
737 		lf.l_start = 0;
738 		lf.l_len = 0;
739 		if (flags & O_EXLOCK)
740 			lf.l_type = F_WRLCK;
741 		else
742 			lf.l_type = F_RDLCK;
743 		type = F_FLOCK;
744 		if ((flags & FNONBLOCK) == 0)
745 			type |= F_WAIT;
746 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
747 			    type)) != 0)
748 			goto bad;
749 		fp->f_flag |= FHASLOCK;
750 	}
751 	if (flags & O_TRUNC) {
752 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
753 			goto bad;
754 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
755 		VATTR_NULL(&vat);
756 		vat.va_size = 0;
757 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
758 #ifdef MAC
759 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
760 		if (error == 0)
761 #endif
762 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
763 		VOP_UNLOCK(vp, 0, td);
764 		vn_finished_write(mp);
765 		if (error)
766 			goto bad;
767 	}
768 	/*
769 	 * Release our private reference, leaving the one associated with
770 	 * the descriptor table intact.
771 	 */
772 	fdrop(fp, td);
773 	td->td_retval[0] = indx;
774 	return (0);
775 bad:
776 	FILEDESC_LOCK(fdp);
777 	if (fdp->fd_ofiles[indx] == fp) {
778 		fdp->fd_ofiles[indx] = NULL;
779 		FILEDESC_UNLOCK(fdp);
780 		fdrop(fp, td);
781 	} else
782 		FILEDESC_UNLOCK(fdp);
783 	fdrop(fp, td);
784 	return (error);
785 }
786 
787 #ifdef COMPAT_43
788 /*
789  * Create a file.
790  */
791 #ifndef _SYS_SYSPROTO_H_
792 struct ocreat_args {
793 	char	*path;
794 	int	mode;
795 };
796 #endif
797 int
798 ocreat(td, uap)
799 	struct thread *td;
800 	register struct ocreat_args /* {
801 		char *path;
802 		int mode;
803 	} */ *uap;
804 {
805 	struct open_args /* {
806 		char *path;
807 		int flags;
808 		int mode;
809 	} */ nuap;
810 
811 	nuap.path = uap->path;
812 	nuap.mode = uap->mode;
813 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
814 	return (open(td, &nuap));
815 }
816 #endif /* COMPAT_43 */
817 
818 /*
819  * Create a special file.
820  */
821 #ifndef _SYS_SYSPROTO_H_
822 struct mknod_args {
823 	char	*path;
824 	int	mode;
825 	int	dev;
826 };
827 #endif
828 /* ARGSUSED */
829 int
830 mknod(td, uap)
831 	struct thread *td;
832 	register struct mknod_args /* {
833 		char *path;
834 		int mode;
835 		int dev;
836 	} */ *uap;
837 {
838 
839 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
840 }
841 
842 int
843 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
844     int dev)
845 {
846 	struct vnode *vp;
847 	struct mount *mp;
848 	struct vattr vattr;
849 	int error;
850 	int whiteout = 0;
851 	struct nameidata nd;
852 
853 	switch (mode & S_IFMT) {
854 	case S_IFCHR:
855 	case S_IFBLK:
856 		error = suser(td);
857 		break;
858 	default:
859 		error = suser_cred(td->td_ucred, PRISON_ROOT);
860 		break;
861 	}
862 	if (error)
863 		return (error);
864 restart:
865 	bwillwrite();
866 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
867 	if ((error = namei(&nd)) != 0)
868 		return (error);
869 	vp = nd.ni_vp;
870 	if (vp != NULL) {
871 		vrele(vp);
872 		error = EEXIST;
873 	} else {
874 		VATTR_NULL(&vattr);
875 		FILEDESC_LOCK(td->td_proc->p_fd);
876 		vattr.va_mode = (mode & ALLPERMS) &
877 		    ~td->td_proc->p_fd->fd_cmask;
878 		FILEDESC_UNLOCK(td->td_proc->p_fd);
879 		vattr.va_rdev = dev;
880 		whiteout = 0;
881 
882 		switch (mode & S_IFMT) {
883 		case S_IFMT:	/* used by badsect to flag bad sectors */
884 			vattr.va_type = VBAD;
885 			break;
886 		case S_IFCHR:
887 			vattr.va_type = VCHR;
888 			break;
889 		case S_IFBLK:
890 			vattr.va_type = VBLK;
891 			break;
892 		case S_IFWHT:
893 			whiteout = 1;
894 			break;
895 		default:
896 			error = EINVAL;
897 			break;
898 		}
899 	}
900 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
901 		NDFREE(&nd, NDF_ONLY_PNBUF);
902 		vput(nd.ni_dvp);
903 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
904 			return (error);
905 		goto restart;
906 	}
907 #ifdef MAC
908 	if (error == 0 && !whiteout)
909 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
910 		    &nd.ni_cnd, &vattr);
911 #endif
912 	if (!error) {
913 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
914 		if (whiteout)
915 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
916 		else {
917 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
918 						&nd.ni_cnd, &vattr);
919 			if (error == 0)
920 				vput(nd.ni_vp);
921 		}
922 	}
923 	NDFREE(&nd, NDF_ONLY_PNBUF);
924 	vput(nd.ni_dvp);
925 	vn_finished_write(mp);
926 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
927 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
928 	return (error);
929 }
930 
931 /*
932  * Create a named pipe.
933  */
934 #ifndef _SYS_SYSPROTO_H_
935 struct mkfifo_args {
936 	char	*path;
937 	int	mode;
938 };
939 #endif
940 /* ARGSUSED */
941 int
942 mkfifo(td, uap)
943 	struct thread *td;
944 	register struct mkfifo_args /* {
945 		char *path;
946 		int mode;
947 	} */ *uap;
948 {
949 
950 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
951 }
952 
953 int
954 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
955 {
956 	struct mount *mp;
957 	struct vattr vattr;
958 	int error;
959 	struct nameidata nd;
960 
961 restart:
962 	bwillwrite();
963 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
964 	if ((error = namei(&nd)) != 0)
965 		return (error);
966 	if (nd.ni_vp != NULL) {
967 		NDFREE(&nd, NDF_ONLY_PNBUF);
968 		vrele(nd.ni_vp);
969 		vput(nd.ni_dvp);
970 		return (EEXIST);
971 	}
972 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
973 		NDFREE(&nd, NDF_ONLY_PNBUF);
974 		vput(nd.ni_dvp);
975 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
976 			return (error);
977 		goto restart;
978 	}
979 	VATTR_NULL(&vattr);
980 	vattr.va_type = VFIFO;
981 	FILEDESC_LOCK(td->td_proc->p_fd);
982 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
983 	FILEDESC_UNLOCK(td->td_proc->p_fd);
984 #ifdef MAC
985 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
986 	    &vattr);
987 	if (error)
988 		goto out;
989 #endif
990 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
991 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
992 	if (error == 0)
993 		vput(nd.ni_vp);
994 #ifdef MAC
995 out:
996 #endif
997 	NDFREE(&nd, NDF_ONLY_PNBUF);
998 	vput(nd.ni_dvp);
999 	vn_finished_write(mp);
1000 	return (error);
1001 }
1002 
1003 /*
1004  * Make a hard file link.
1005  */
1006 #ifndef _SYS_SYSPROTO_H_
1007 struct link_args {
1008 	char	*path;
1009 	char	*link;
1010 };
1011 #endif
1012 /* ARGSUSED */
1013 int
1014 link(td, uap)
1015 	struct thread *td;
1016 	register struct link_args /* {
1017 		char *path;
1018 		char *link;
1019 	} */ *uap;
1020 {
1021 
1022 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1023 }
1024 
1025 int
1026 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1027 {
1028 	struct vnode *vp;
1029 	struct mount *mp;
1030 	struct nameidata nd;
1031 	int error;
1032 
1033 	bwillwrite();
1034 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1035 	if ((error = namei(&nd)) != 0)
1036 		return (error);
1037 	NDFREE(&nd, NDF_ONLY_PNBUF);
1038 	vp = nd.ni_vp;
1039 	if (vp->v_type == VDIR) {
1040 		vrele(vp);
1041 		return (EPERM);		/* POSIX */
1042 	}
1043 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1044 		vrele(vp);
1045 		return (error);
1046 	}
1047 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1048 	if ((error = namei(&nd)) == 0) {
1049 		if (nd.ni_vp != NULL) {
1050 			vrele(nd.ni_vp);
1051 			error = EEXIST;
1052 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1053 		    == 0) {
1054 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1055 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1056 #ifdef MAC
1057 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1058 			    vp, &nd.ni_cnd);
1059 			if (error == 0)
1060 #endif
1061 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1062 			VOP_UNLOCK(vp, 0, td);
1063 		}
1064 		NDFREE(&nd, NDF_ONLY_PNBUF);
1065 		vput(nd.ni_dvp);
1066 	}
1067 	vrele(vp);
1068 	vn_finished_write(mp);
1069 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1070 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1071 	return (error);
1072 }
1073 
1074 /*
1075  * Make a symbolic link.
1076  */
1077 #ifndef _SYS_SYSPROTO_H_
1078 struct symlink_args {
1079 	char	*path;
1080 	char	*link;
1081 };
1082 #endif
1083 /* ARGSUSED */
1084 int
1085 symlink(td, uap)
1086 	struct thread *td;
1087 	register struct symlink_args /* {
1088 		char *path;
1089 		char *link;
1090 	} */ *uap;
1091 {
1092 
1093 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1094 }
1095 
1096 int
1097 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1098 {
1099 	struct mount *mp;
1100 	struct vattr vattr;
1101 	char *syspath;
1102 	int error;
1103 	struct nameidata nd;
1104 
1105 	if (segflg == UIO_SYSSPACE) {
1106 		syspath = path;
1107 	} else {
1108 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1109 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1110 			goto out;
1111 	}
1112 restart:
1113 	bwillwrite();
1114 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1115 	if ((error = namei(&nd)) != 0)
1116 		goto out;
1117 	if (nd.ni_vp) {
1118 		NDFREE(&nd, NDF_ONLY_PNBUF);
1119 		vrele(nd.ni_vp);
1120 		vput(nd.ni_dvp);
1121 		error = EEXIST;
1122 		goto out;
1123 	}
1124 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1125 		NDFREE(&nd, NDF_ONLY_PNBUF);
1126 		vput(nd.ni_dvp);
1127 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1128 			return (error);
1129 		goto restart;
1130 	}
1131 	VATTR_NULL(&vattr);
1132 	FILEDESC_LOCK(td->td_proc->p_fd);
1133 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1134 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1135 #ifdef MAC
1136 	vattr.va_type = VLNK;
1137 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1138 	    &vattr);
1139 	if (error)
1140 		goto out2;
1141 #endif
1142 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1143 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1144 	if (error == 0)
1145 		vput(nd.ni_vp);
1146 #ifdef MAC
1147 out2:
1148 #endif
1149 	NDFREE(&nd, NDF_ONLY_PNBUF);
1150 	vput(nd.ni_dvp);
1151 	vn_finished_write(mp);
1152 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1153 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1154 out:
1155 	if (segflg != UIO_SYSSPACE)
1156 		uma_zfree(namei_zone, syspath);
1157 	return (error);
1158 }
1159 
1160 /*
1161  * Delete a whiteout from the filesystem.
1162  */
1163 /* ARGSUSED */
1164 int
1165 undelete(td, uap)
1166 	struct thread *td;
1167 	register struct undelete_args /* {
1168 		char *path;
1169 	} */ *uap;
1170 {
1171 	int error;
1172 	struct mount *mp;
1173 	struct nameidata nd;
1174 
1175 restart:
1176 	bwillwrite();
1177 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1178 	    uap->path, td);
1179 	error = namei(&nd);
1180 	if (error)
1181 		return (error);
1182 
1183 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1184 		NDFREE(&nd, NDF_ONLY_PNBUF);
1185 		if (nd.ni_vp)
1186 			vrele(nd.ni_vp);
1187 		vput(nd.ni_dvp);
1188 		return (EEXIST);
1189 	}
1190 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1191 		NDFREE(&nd, NDF_ONLY_PNBUF);
1192 		vput(nd.ni_dvp);
1193 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1194 			return (error);
1195 		goto restart;
1196 	}
1197 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1198 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1199 	NDFREE(&nd, NDF_ONLY_PNBUF);
1200 	vput(nd.ni_dvp);
1201 	vn_finished_write(mp);
1202 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1203 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1204 	return (error);
1205 }
1206 
1207 /*
1208  * Delete a name from the filesystem.
1209  */
1210 #ifndef _SYS_SYSPROTO_H_
1211 struct unlink_args {
1212 	char	*path;
1213 };
1214 #endif
1215 /* ARGSUSED */
1216 int
1217 unlink(td, uap)
1218 	struct thread *td;
1219 	struct unlink_args /* {
1220 		char *path;
1221 	} */ *uap;
1222 {
1223 
1224 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1225 }
1226 
1227 int
1228 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1229 {
1230 	struct mount *mp;
1231 	struct vnode *vp;
1232 	int error;
1233 	struct nameidata nd;
1234 
1235 restart:
1236 	bwillwrite();
1237 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1238 	if ((error = namei(&nd)) != 0)
1239 		return (error);
1240 	vp = nd.ni_vp;
1241 	if (vp->v_type == VDIR)
1242 		error = EPERM;		/* POSIX */
1243 	else {
1244 		/*
1245 		 * The root of a mounted filesystem cannot be deleted.
1246 		 *
1247 		 * XXX: can this only be a VDIR case?
1248 		 */
1249 		if (vp->v_vflag & VV_ROOT)
1250 			error = EBUSY;
1251 	}
1252 	if (error == 0) {
1253 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1254 			NDFREE(&nd, NDF_ONLY_PNBUF);
1255 			if (vp == nd.ni_dvp)
1256 				vrele(vp);
1257 			else
1258 				vput(vp);
1259 			vput(nd.ni_dvp);
1260 			if ((error = vn_start_write(NULL, &mp,
1261 			    V_XSLEEP | PCATCH)) != 0)
1262 				return (error);
1263 			goto restart;
1264 		}
1265 #ifdef MAC
1266 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1267 		    &nd.ni_cnd);
1268 		if (error)
1269 			goto out;
1270 #endif
1271 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1272 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1273 #ifdef MAC
1274 out:
1275 #endif
1276 		vn_finished_write(mp);
1277 	}
1278 	NDFREE(&nd, NDF_ONLY_PNBUF);
1279 	if (vp == nd.ni_dvp)
1280 		vrele(vp);
1281 	else
1282 		vput(vp);
1283 	vput(nd.ni_dvp);
1284 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1285 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1286 	return (error);
1287 }
1288 
1289 /*
1290  * Reposition read/write file offset.
1291  */
1292 #ifndef _SYS_SYSPROTO_H_
1293 struct lseek_args {
1294 	int	fd;
1295 	int	pad;
1296 	off_t	offset;
1297 	int	whence;
1298 };
1299 #endif
1300 int
1301 lseek(td, uap)
1302 	struct thread *td;
1303 	register struct lseek_args /* {
1304 		int fd;
1305 		int pad;
1306 		off_t offset;
1307 		int whence;
1308 	} */ *uap;
1309 {
1310 	struct ucred *cred = td->td_ucred;
1311 	struct file *fp;
1312 	struct vnode *vp;
1313 	struct vattr vattr;
1314 	off_t offset;
1315 	int error, noneg;
1316 
1317 	if ((error = fget(td, uap->fd, &fp)) != 0)
1318 		return (error);
1319 	if (fp->f_type != DTYPE_VNODE) {
1320 		fdrop(fp, td);
1321 		return (ESPIPE);
1322 	}
1323 	vp = fp->f_data;
1324 	noneg = (vp->v_type != VCHR);
1325 	offset = uap->offset;
1326 	switch (uap->whence) {
1327 	case L_INCR:
1328 		if (noneg &&
1329 		    (fp->f_offset < 0 ||
1330 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1331 			error = EOVERFLOW;
1332 			break;
1333 		}
1334 		offset += fp->f_offset;
1335 		break;
1336 	case L_XTND:
1337 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1338 		error = VOP_GETATTR(vp, &vattr, cred, td);
1339 		VOP_UNLOCK(vp, 0, td);
1340 		if (error)
1341 			break;
1342 		if (noneg &&
1343 		    (vattr.va_size > OFF_MAX ||
1344 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1345 			error = EOVERFLOW;
1346 			break;
1347 		}
1348 		offset += vattr.va_size;
1349 		break;
1350 	case L_SET:
1351 		break;
1352 	default:
1353 		error = EINVAL;
1354 	}
1355 	if (error == 0 && noneg && offset < 0)
1356 		error = EINVAL;
1357 	if (error != 0) {
1358 		fdrop(fp, td);
1359 		return (error);
1360 	}
1361 	fp->f_offset = offset;
1362 	*(off_t *)(td->td_retval) = fp->f_offset;
1363 	fdrop(fp, td);
1364 	return (0);
1365 }
1366 
1367 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1368 /*
1369  * Reposition read/write file offset.
1370  */
1371 #ifndef _SYS_SYSPROTO_H_
1372 struct olseek_args {
1373 	int	fd;
1374 	long	offset;
1375 	int	whence;
1376 };
1377 #endif
1378 int
1379 olseek(td, uap)
1380 	struct thread *td;
1381 	register struct olseek_args /* {
1382 		int fd;
1383 		long offset;
1384 		int whence;
1385 	} */ *uap;
1386 {
1387 	struct lseek_args /* {
1388 		int fd;
1389 		int pad;
1390 		off_t offset;
1391 		int whence;
1392 	} */ nuap;
1393 	int error;
1394 
1395 	nuap.fd = uap->fd;
1396 	nuap.offset = uap->offset;
1397 	nuap.whence = uap->whence;
1398 	error = lseek(td, &nuap);
1399 	return (error);
1400 }
1401 #endif /* COMPAT_43 */
1402 
1403 /*
1404  * Check access permissions using passed credentials.
1405  */
1406 static int
1407 vn_access(vp, user_flags, cred, td)
1408 	struct vnode	*vp;
1409 	int		user_flags;
1410 	struct ucred	*cred;
1411 	struct thread	*td;
1412 {
1413 	int error, flags;
1414 
1415 	/* Flags == 0 means only check for existence. */
1416 	error = 0;
1417 	if (user_flags) {
1418 		flags = 0;
1419 		if (user_flags & R_OK)
1420 			flags |= VREAD;
1421 		if (user_flags & W_OK)
1422 			flags |= VWRITE;
1423 		if (user_flags & X_OK)
1424 			flags |= VEXEC;
1425 #ifdef MAC
1426 		error = mac_check_vnode_access(cred, vp, flags);
1427 		if (error)
1428 			return (error);
1429 #endif
1430 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1431 			error = VOP_ACCESS(vp, flags, cred, td);
1432 	}
1433 	return (error);
1434 }
1435 
1436 /*
1437  * Check access permissions using "real" credentials.
1438  */
1439 #ifndef _SYS_SYSPROTO_H_
1440 struct access_args {
1441 	char	*path;
1442 	int	flags;
1443 };
1444 #endif
1445 int
1446 access(td, uap)
1447 	struct thread *td;
1448 	register struct access_args /* {
1449 		char *path;
1450 		int flags;
1451 	} */ *uap;
1452 {
1453 
1454 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1455 }
1456 
1457 int
1458 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1459 {
1460 	struct ucred *cred, *tmpcred;
1461 	register struct vnode *vp;
1462 	int error;
1463 	struct nameidata nd;
1464 
1465 	/*
1466 	 * Create and modify a temporary credential instead of one that
1467 	 * is potentially shared.  This could also mess up socket
1468 	 * buffer accounting which can run in an interrupt context.
1469 	 *
1470 	 * XXX - Depending on how "threads" are finally implemented, it
1471 	 * may be better to explicitly pass the credential to namei()
1472 	 * rather than to modify the potentially shared process structure.
1473 	 */
1474 	cred = td->td_ucred;
1475 	tmpcred = crdup(cred);
1476 	tmpcred->cr_uid = cred->cr_ruid;
1477 	tmpcred->cr_groups[0] = cred->cr_rgid;
1478 	td->td_ucred = tmpcred;
1479 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1480 	if ((error = namei(&nd)) != 0)
1481 		goto out1;
1482 	vp = nd.ni_vp;
1483 
1484 	error = vn_access(vp, flags, tmpcred, td);
1485 	NDFREE(&nd, NDF_ONLY_PNBUF);
1486 	vput(vp);
1487 out1:
1488 	td->td_ucred = cred;
1489 	crfree(tmpcred);
1490 	return (error);
1491 }
1492 
1493 /*
1494  * Check access permissions using "effective" credentials.
1495  */
1496 #ifndef _SYS_SYSPROTO_H_
1497 struct eaccess_args {
1498 	char	*path;
1499 	int	flags;
1500 };
1501 #endif
1502 int
1503 eaccess(td, uap)
1504 	struct thread *td;
1505 	register struct eaccess_args /* {
1506 		char *path;
1507 		int flags;
1508 	} */ *uap;
1509 {
1510 	struct nameidata nd;
1511 	struct vnode *vp;
1512 	int error;
1513 
1514 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1515 	    uap->path, td);
1516 	if ((error = namei(&nd)) != 0)
1517 		return (error);
1518 	vp = nd.ni_vp;
1519 
1520 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1521 	NDFREE(&nd, NDF_ONLY_PNBUF);
1522 	vput(vp);
1523 	return (error);
1524 }
1525 
1526 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1527 /*
1528  * Get file status; this version follows links.
1529  */
1530 #ifndef _SYS_SYSPROTO_H_
1531 struct ostat_args {
1532 	char	*path;
1533 	struct ostat *ub;
1534 };
1535 #endif
1536 /* ARGSUSED */
1537 int
1538 ostat(td, uap)
1539 	struct thread *td;
1540 	register struct ostat_args /* {
1541 		char *path;
1542 		struct ostat *ub;
1543 	} */ *uap;
1544 {
1545 	struct stat sb;
1546 	struct ostat osb;
1547 	int error;
1548 	struct nameidata nd;
1549 
1550 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1551 	    uap->path, td);
1552 	if ((error = namei(&nd)) != 0)
1553 		return (error);
1554 	NDFREE(&nd, NDF_ONLY_PNBUF);
1555 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1556 	vput(nd.ni_vp);
1557 	if (error)
1558 		return (error);
1559 	cvtstat(&sb, &osb);
1560 	error = copyout(&osb, uap->ub, sizeof (osb));
1561 	return (error);
1562 }
1563 
1564 /*
1565  * Get file status; this version does not follow links.
1566  */
1567 #ifndef _SYS_SYSPROTO_H_
1568 struct olstat_args {
1569 	char	*path;
1570 	struct ostat *ub;
1571 };
1572 #endif
1573 /* ARGSUSED */
1574 int
1575 olstat(td, uap)
1576 	struct thread *td;
1577 	register struct olstat_args /* {
1578 		char *path;
1579 		struct ostat *ub;
1580 	} */ *uap;
1581 {
1582 	struct vnode *vp;
1583 	struct stat sb;
1584 	struct ostat osb;
1585 	int error;
1586 	struct nameidata nd;
1587 
1588 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1589 	    uap->path, td);
1590 	if ((error = namei(&nd)) != 0)
1591 		return (error);
1592 	vp = nd.ni_vp;
1593 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1594 	NDFREE(&nd, NDF_ONLY_PNBUF);
1595 	vput(vp);
1596 	if (error)
1597 		return (error);
1598 	cvtstat(&sb, &osb);
1599 	error = copyout(&osb, uap->ub, sizeof (osb));
1600 	return (error);
1601 }
1602 
1603 /*
1604  * Convert from an old to a new stat structure.
1605  */
1606 void
1607 cvtstat(st, ost)
1608 	struct stat *st;
1609 	struct ostat *ost;
1610 {
1611 
1612 	ost->st_dev = st->st_dev;
1613 	ost->st_ino = st->st_ino;
1614 	ost->st_mode = st->st_mode;
1615 	ost->st_nlink = st->st_nlink;
1616 	ost->st_uid = st->st_uid;
1617 	ost->st_gid = st->st_gid;
1618 	ost->st_rdev = st->st_rdev;
1619 	if (st->st_size < (quad_t)1 << 32)
1620 		ost->st_size = st->st_size;
1621 	else
1622 		ost->st_size = -2;
1623 	ost->st_atime = st->st_atime;
1624 	ost->st_mtime = st->st_mtime;
1625 	ost->st_ctime = st->st_ctime;
1626 	ost->st_blksize = st->st_blksize;
1627 	ost->st_blocks = st->st_blocks;
1628 	ost->st_flags = st->st_flags;
1629 	ost->st_gen = st->st_gen;
1630 }
1631 #endif /* COMPAT_43 || COMPAT_SUNOS */
1632 
1633 /*
1634  * Get file status; this version follows links.
1635  */
1636 #ifndef _SYS_SYSPROTO_H_
1637 struct stat_args {
1638 	char	*path;
1639 	struct stat *ub;
1640 };
1641 #endif
1642 /* ARGSUSED */
1643 int
1644 stat(td, uap)
1645 	struct thread *td;
1646 	register struct stat_args /* {
1647 		char *path;
1648 		struct stat *ub;
1649 	} */ *uap;
1650 {
1651 	struct stat sb;
1652 	int error;
1653 	struct nameidata nd;
1654 
1655 #ifdef LOOKUP_SHARED
1656 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1657 	    UIO_USERSPACE, uap->path, td);
1658 #else
1659 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1660 	    uap->path, td);
1661 #endif
1662 	if ((error = namei(&nd)) != 0)
1663 		return (error);
1664 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1665 	NDFREE(&nd, NDF_ONLY_PNBUF);
1666 	vput(nd.ni_vp);
1667 	if (error)
1668 		return (error);
1669 	error = copyout(&sb, uap->ub, sizeof (sb));
1670 	return (error);
1671 }
1672 
1673 /*
1674  * Get file status; this version does not follow links.
1675  */
1676 #ifndef _SYS_SYSPROTO_H_
1677 struct lstat_args {
1678 	char	*path;
1679 	struct stat *ub;
1680 };
1681 #endif
1682 /* ARGSUSED */
1683 int
1684 lstat(td, uap)
1685 	struct thread *td;
1686 	register struct lstat_args /* {
1687 		char *path;
1688 		struct stat *ub;
1689 	} */ *uap;
1690 {
1691 	int error;
1692 	struct vnode *vp;
1693 	struct stat sb;
1694 	struct nameidata nd;
1695 
1696 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1697 	    uap->path, td);
1698 	if ((error = namei(&nd)) != 0)
1699 		return (error);
1700 	vp = nd.ni_vp;
1701 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1702 	NDFREE(&nd, NDF_ONLY_PNBUF);
1703 	vput(vp);
1704 	if (error)
1705 		return (error);
1706 	error = copyout(&sb, uap->ub, sizeof (sb));
1707 	return (error);
1708 }
1709 
1710 /*
1711  * Implementation of the NetBSD stat() function.
1712  * XXX This should probably be collapsed with the FreeBSD version,
1713  * as the differences are only due to vn_stat() clearing spares at
1714  * the end of the structures.  vn_stat could be split to avoid this,
1715  * and thus collapse the following to close to zero code.
1716  */
1717 void
1718 cvtnstat(sb, nsb)
1719 	struct stat *sb;
1720 	struct nstat *nsb;
1721 {
1722 	bzero(nsb, sizeof *nsb);
1723 	nsb->st_dev = sb->st_dev;
1724 	nsb->st_ino = sb->st_ino;
1725 	nsb->st_mode = sb->st_mode;
1726 	nsb->st_nlink = sb->st_nlink;
1727 	nsb->st_uid = sb->st_uid;
1728 	nsb->st_gid = sb->st_gid;
1729 	nsb->st_rdev = sb->st_rdev;
1730 	nsb->st_atimespec = sb->st_atimespec;
1731 	nsb->st_mtimespec = sb->st_mtimespec;
1732 	nsb->st_ctimespec = sb->st_ctimespec;
1733 	nsb->st_size = sb->st_size;
1734 	nsb->st_blocks = sb->st_blocks;
1735 	nsb->st_blksize = sb->st_blksize;
1736 	nsb->st_flags = sb->st_flags;
1737 	nsb->st_gen = sb->st_gen;
1738 	nsb->st_birthtimespec = sb->st_birthtimespec;
1739 }
1740 
1741 #ifndef _SYS_SYSPROTO_H_
1742 struct nstat_args {
1743 	char	*path;
1744 	struct nstat *ub;
1745 };
1746 #endif
1747 /* ARGSUSED */
1748 int
1749 nstat(td, uap)
1750 	struct thread *td;
1751 	register struct nstat_args /* {
1752 		char *path;
1753 		struct nstat *ub;
1754 	} */ *uap;
1755 {
1756 	struct stat sb;
1757 	struct nstat nsb;
1758 	int error;
1759 	struct nameidata nd;
1760 
1761 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1762 	    uap->path, td);
1763 	if ((error = namei(&nd)) != 0)
1764 		return (error);
1765 	NDFREE(&nd, NDF_ONLY_PNBUF);
1766 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1767 	vput(nd.ni_vp);
1768 	if (error)
1769 		return (error);
1770 	cvtnstat(&sb, &nsb);
1771 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1772 	return (error);
1773 }
1774 
1775 /*
1776  * NetBSD lstat.  Get file status; this version does not follow links.
1777  */
1778 #ifndef _SYS_SYSPROTO_H_
1779 struct lstat_args {
1780 	char	*path;
1781 	struct stat *ub;
1782 };
1783 #endif
1784 /* ARGSUSED */
1785 int
1786 nlstat(td, uap)
1787 	struct thread *td;
1788 	register struct nlstat_args /* {
1789 		char *path;
1790 		struct nstat *ub;
1791 	} */ *uap;
1792 {
1793 	int error;
1794 	struct vnode *vp;
1795 	struct stat sb;
1796 	struct nstat nsb;
1797 	struct nameidata nd;
1798 
1799 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1800 	    uap->path, td);
1801 	if ((error = namei(&nd)) != 0)
1802 		return (error);
1803 	vp = nd.ni_vp;
1804 	NDFREE(&nd, NDF_ONLY_PNBUF);
1805 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1806 	vput(vp);
1807 	if (error)
1808 		return (error);
1809 	cvtnstat(&sb, &nsb);
1810 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1811 	return (error);
1812 }
1813 
1814 /*
1815  * Get configurable pathname variables.
1816  */
1817 #ifndef _SYS_SYSPROTO_H_
1818 struct pathconf_args {
1819 	char	*path;
1820 	int	name;
1821 };
1822 #endif
1823 /* ARGSUSED */
1824 int
1825 pathconf(td, uap)
1826 	struct thread *td;
1827 	register struct pathconf_args /* {
1828 		char *path;
1829 		int name;
1830 	} */ *uap;
1831 {
1832 	int error;
1833 	struct nameidata nd;
1834 
1835 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1836 	    uap->path, td);
1837 	if ((error = namei(&nd)) != 0)
1838 		return (error);
1839 	NDFREE(&nd, NDF_ONLY_PNBUF);
1840 
1841 	/* If asynchronous I/O is available, it works for all files. */
1842 	if (uap->name == _PC_ASYNC_IO)
1843 		td->td_retval[0] = async_io_version;
1844 	else
1845 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1846 	vput(nd.ni_vp);
1847 	return (error);
1848 }
1849 
1850 /*
1851  * Return target name of a symbolic link.
1852  */
1853 #ifndef _SYS_SYSPROTO_H_
1854 struct readlink_args {
1855 	char	*path;
1856 	char	*buf;
1857 	int	count;
1858 };
1859 #endif
1860 /* ARGSUSED */
1861 int
1862 readlink(td, uap)
1863 	struct thread *td;
1864 	register struct readlink_args /* {
1865 		char *path;
1866 		char *buf;
1867 		int count;
1868 	} */ *uap;
1869 {
1870 
1871 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1872 	    UIO_USERSPACE, uap->count));
1873 }
1874 
1875 int
1876 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1877     enum uio_seg bufseg, int count)
1878 {
1879 	register struct vnode *vp;
1880 	struct iovec aiov;
1881 	struct uio auio;
1882 	int error;
1883 	struct nameidata nd;
1884 
1885 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1886 	if ((error = namei(&nd)) != 0)
1887 		return (error);
1888 	NDFREE(&nd, NDF_ONLY_PNBUF);
1889 	vp = nd.ni_vp;
1890 #ifdef MAC
1891 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1892 	if (error) {
1893 		vput(vp);
1894 		return (error);
1895 	}
1896 #endif
1897 	if (vp->v_type != VLNK)
1898 		error = EINVAL;
1899 	else {
1900 		aiov.iov_base = buf;
1901 		aiov.iov_len = count;
1902 		auio.uio_iov = &aiov;
1903 		auio.uio_iovcnt = 1;
1904 		auio.uio_offset = 0;
1905 		auio.uio_rw = UIO_READ;
1906 		auio.uio_segflg = bufseg;
1907 		auio.uio_td = td;
1908 		auio.uio_resid = count;
1909 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1910 	}
1911 	vput(vp);
1912 	td->td_retval[0] = count - auio.uio_resid;
1913 	return (error);
1914 }
1915 
1916 /*
1917  * Common implementation code for chflags() and fchflags().
1918  */
1919 static int
1920 setfflags(td, vp, flags)
1921 	struct thread *td;
1922 	struct vnode *vp;
1923 	int flags;
1924 {
1925 	int error;
1926 	struct mount *mp;
1927 	struct vattr vattr;
1928 
1929 	/*
1930 	 * Prevent non-root users from setting flags on devices.  When
1931 	 * a device is reused, users can retain ownership of the device
1932 	 * if they are allowed to set flags and programs assume that
1933 	 * chown can't fail when done as root.
1934 	 */
1935 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1936 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1937 		if (error)
1938 			return (error);
1939 	}
1940 
1941 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1942 		return (error);
1943 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1944 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1945 	VATTR_NULL(&vattr);
1946 	vattr.va_flags = flags;
1947 #ifdef MAC
1948 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1949 	if (error == 0)
1950 #endif
1951 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1952 	VOP_UNLOCK(vp, 0, td);
1953 	vn_finished_write(mp);
1954 	return (error);
1955 }
1956 
1957 /*
1958  * Change flags of a file given a path name.
1959  */
1960 #ifndef _SYS_SYSPROTO_H_
1961 struct chflags_args {
1962 	char	*path;
1963 	int	flags;
1964 };
1965 #endif
1966 /* ARGSUSED */
1967 int
1968 chflags(td, uap)
1969 	struct thread *td;
1970 	register struct chflags_args /* {
1971 		char *path;
1972 		int flags;
1973 	} */ *uap;
1974 {
1975 	int error;
1976 	struct nameidata nd;
1977 
1978 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
1979 	if ((error = namei(&nd)) != 0)
1980 		return (error);
1981 	NDFREE(&nd, NDF_ONLY_PNBUF);
1982 	error = setfflags(td, nd.ni_vp, uap->flags);
1983 	vrele(nd.ni_vp);
1984 	return error;
1985 }
1986 
1987 /*
1988  * Same as chflags() but doesn't follow symlinks.
1989  */
1990 int
1991 lchflags(td, uap)
1992 	struct thread *td;
1993 	register struct lchflags_args /* {
1994 		char *path;
1995 		int flags;
1996 	} */ *uap;
1997 {
1998 	int error;
1999 	struct nameidata nd;
2000 
2001 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2002 	if ((error = namei(&nd)) != 0)
2003 		return (error);
2004 	NDFREE(&nd, NDF_ONLY_PNBUF);
2005 	error = setfflags(td, nd.ni_vp, uap->flags);
2006 	vrele(nd.ni_vp);
2007 	return error;
2008 }
2009 
2010 /*
2011  * Change flags of a file given a file descriptor.
2012  */
2013 #ifndef _SYS_SYSPROTO_H_
2014 struct fchflags_args {
2015 	int	fd;
2016 	int	flags;
2017 };
2018 #endif
2019 /* ARGSUSED */
2020 int
2021 fchflags(td, uap)
2022 	struct thread *td;
2023 	register struct fchflags_args /* {
2024 		int fd;
2025 		int flags;
2026 	} */ *uap;
2027 {
2028 	struct file *fp;
2029 	int error;
2030 
2031 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2032 		return (error);
2033 	error = setfflags(td, fp->f_data, uap->flags);
2034 	fdrop(fp, td);
2035 	return (error);
2036 }
2037 
2038 /*
2039  * Common implementation code for chmod(), lchmod() and fchmod().
2040  */
2041 static int
2042 setfmode(td, vp, mode)
2043 	struct thread *td;
2044 	struct vnode *vp;
2045 	int mode;
2046 {
2047 	int error;
2048 	struct mount *mp;
2049 	struct vattr vattr;
2050 
2051 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2052 		return (error);
2053 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2054 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2055 	VATTR_NULL(&vattr);
2056 	vattr.va_mode = mode & ALLPERMS;
2057 #ifdef MAC
2058 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2059 	if (error == 0)
2060 #endif
2061 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2062 	VOP_UNLOCK(vp, 0, td);
2063 	vn_finished_write(mp);
2064 	return error;
2065 }
2066 
2067 /*
2068  * Change mode of a file given path name.
2069  */
2070 #ifndef _SYS_SYSPROTO_H_
2071 struct chmod_args {
2072 	char	*path;
2073 	int	mode;
2074 };
2075 #endif
2076 /* ARGSUSED */
2077 int
2078 chmod(td, uap)
2079 	struct thread *td;
2080 	register struct chmod_args /* {
2081 		char *path;
2082 		int mode;
2083 	} */ *uap;
2084 {
2085 
2086 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2087 }
2088 
2089 int
2090 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2091 {
2092 	int error;
2093 	struct nameidata nd;
2094 
2095 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2096 	if ((error = namei(&nd)) != 0)
2097 		return (error);
2098 	NDFREE(&nd, NDF_ONLY_PNBUF);
2099 	error = setfmode(td, nd.ni_vp, mode);
2100 	vrele(nd.ni_vp);
2101 	return error;
2102 }
2103 
2104 /*
2105  * Change mode of a file given path name (don't follow links.)
2106  */
2107 #ifndef _SYS_SYSPROTO_H_
2108 struct lchmod_args {
2109 	char	*path;
2110 	int	mode;
2111 };
2112 #endif
2113 /* ARGSUSED */
2114 int
2115 lchmod(td, uap)
2116 	struct thread *td;
2117 	register struct lchmod_args /* {
2118 		char *path;
2119 		int mode;
2120 	} */ *uap;
2121 {
2122 	int error;
2123 	struct nameidata nd;
2124 
2125 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2126 	if ((error = namei(&nd)) != 0)
2127 		return (error);
2128 	NDFREE(&nd, NDF_ONLY_PNBUF);
2129 	error = setfmode(td, nd.ni_vp, uap->mode);
2130 	vrele(nd.ni_vp);
2131 	return error;
2132 }
2133 
2134 /*
2135  * Change mode of a file given a file descriptor.
2136  */
2137 #ifndef _SYS_SYSPROTO_H_
2138 struct fchmod_args {
2139 	int	fd;
2140 	int	mode;
2141 };
2142 #endif
2143 /* ARGSUSED */
2144 int
2145 fchmod(td, uap)
2146 	struct thread *td;
2147 	register struct fchmod_args /* {
2148 		int fd;
2149 		int mode;
2150 	} */ *uap;
2151 {
2152 	struct file *fp;
2153 	struct vnode *vp;
2154 	int error;
2155 
2156 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2157 		return (error);
2158 	vp = fp->f_data;
2159 	error = setfmode(td, fp->f_data, uap->mode);
2160 	fdrop(fp, td);
2161 	return (error);
2162 }
2163 
2164 /*
2165  * Common implementation for chown(), lchown(), and fchown()
2166  */
2167 static int
2168 setfown(td, vp, uid, gid)
2169 	struct thread *td;
2170 	struct vnode *vp;
2171 	uid_t uid;
2172 	gid_t gid;
2173 {
2174 	int error;
2175 	struct mount *mp;
2176 	struct vattr vattr;
2177 
2178 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2179 		return (error);
2180 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2181 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2182 	VATTR_NULL(&vattr);
2183 	vattr.va_uid = uid;
2184 	vattr.va_gid = gid;
2185 #ifdef MAC
2186 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2187 	    vattr.va_gid);
2188 	if (error == 0)
2189 #endif
2190 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2191 	VOP_UNLOCK(vp, 0, td);
2192 	vn_finished_write(mp);
2193 	return error;
2194 }
2195 
2196 /*
2197  * Set ownership given a path name.
2198  */
2199 #ifndef _SYS_SYSPROTO_H_
2200 struct chown_args {
2201 	char	*path;
2202 	int	uid;
2203 	int	gid;
2204 };
2205 #endif
2206 /* ARGSUSED */
2207 int
2208 chown(td, uap)
2209 	struct thread *td;
2210 	register struct chown_args /* {
2211 		char *path;
2212 		int uid;
2213 		int gid;
2214 	} */ *uap;
2215 {
2216 
2217 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2218 }
2219 
2220 int
2221 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2222     int gid)
2223 {
2224 	int error;
2225 	struct nameidata nd;
2226 
2227 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2228 	if ((error = namei(&nd)) != 0)
2229 		return (error);
2230 	NDFREE(&nd, NDF_ONLY_PNBUF);
2231 	error = setfown(td, nd.ni_vp, uid, gid);
2232 	vrele(nd.ni_vp);
2233 	return (error);
2234 }
2235 
2236 /*
2237  * Set ownership given a path name, do not cross symlinks.
2238  */
2239 #ifndef _SYS_SYSPROTO_H_
2240 struct lchown_args {
2241 	char	*path;
2242 	int	uid;
2243 	int	gid;
2244 };
2245 #endif
2246 /* ARGSUSED */
2247 int
2248 lchown(td, uap)
2249 	struct thread *td;
2250 	register struct lchown_args /* {
2251 		char *path;
2252 		int uid;
2253 		int gid;
2254 	} */ *uap;
2255 {
2256 
2257 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2258 }
2259 
2260 int
2261 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2262     int gid)
2263 {
2264 	int error;
2265 	struct nameidata nd;
2266 
2267 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2268 	if ((error = namei(&nd)) != 0)
2269 		return (error);
2270 	NDFREE(&nd, NDF_ONLY_PNBUF);
2271 	error = setfown(td, nd.ni_vp, uid, gid);
2272 	vrele(nd.ni_vp);
2273 	return (error);
2274 }
2275 
2276 /*
2277  * Set ownership given a file descriptor.
2278  */
2279 #ifndef _SYS_SYSPROTO_H_
2280 struct fchown_args {
2281 	int	fd;
2282 	int	uid;
2283 	int	gid;
2284 };
2285 #endif
2286 /* ARGSUSED */
2287 int
2288 fchown(td, uap)
2289 	struct thread *td;
2290 	register struct fchown_args /* {
2291 		int fd;
2292 		int uid;
2293 		int gid;
2294 	} */ *uap;
2295 {
2296 	struct file *fp;
2297 	struct vnode *vp;
2298 	int error;
2299 
2300 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2301 		return (error);
2302 	vp = fp->f_data;
2303 	error = setfown(td, fp->f_data, uap->uid, uap->gid);
2304 	fdrop(fp, td);
2305 	return (error);
2306 }
2307 
2308 /*
2309  * Common implementation code for utimes(), lutimes(), and futimes().
2310  */
2311 static int
2312 getutimes(usrtvp, tvpseg, tsp)
2313 	const struct timeval *usrtvp;
2314 	enum uio_seg tvpseg;
2315 	struct timespec *tsp;
2316 {
2317 	struct timeval tv[2];
2318 	const struct timeval *tvp;
2319 	int error;
2320 
2321 	if (usrtvp == NULL) {
2322 		microtime(&tv[0]);
2323 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2324 		tsp[1] = tsp[0];
2325 	} else {
2326 		if (tvpseg == UIO_SYSSPACE) {
2327 			tvp = usrtvp;
2328 		} else {
2329 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2330 				return (error);
2331 			tvp = tv;
2332 		}
2333 
2334 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2335 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2336 	}
2337 	return 0;
2338 }
2339 
2340 /*
2341  * Common implementation code for utimes(), lutimes(), and futimes().
2342  */
2343 static int
2344 setutimes(td, vp, ts, numtimes, nullflag)
2345 	struct thread *td;
2346 	struct vnode *vp;
2347 	const struct timespec *ts;
2348 	int numtimes;
2349 	int nullflag;
2350 {
2351 	int error, setbirthtime;
2352 	struct mount *mp;
2353 	struct vattr vattr;
2354 
2355 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2356 		return (error);
2357 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2358 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2359 	setbirthtime = 0;
2360 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2361 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2362 		setbirthtime = 1;
2363 	VATTR_NULL(&vattr);
2364 	vattr.va_atime = ts[0];
2365 	vattr.va_mtime = ts[1];
2366 	if (setbirthtime)
2367 		vattr.va_birthtime = ts[1];
2368 	if (numtimes > 2)
2369 		vattr.va_birthtime = ts[2];
2370 	if (nullflag)
2371 		vattr.va_vaflags |= VA_UTIMES_NULL;
2372 #ifdef MAC
2373 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2374 	    vattr.va_mtime);
2375 #endif
2376 	if (error == 0)
2377 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2378 	VOP_UNLOCK(vp, 0, td);
2379 	vn_finished_write(mp);
2380 	return error;
2381 }
2382 
2383 /*
2384  * Set the access and modification times of a file.
2385  */
2386 #ifndef _SYS_SYSPROTO_H_
2387 struct utimes_args {
2388 	char	*path;
2389 	struct	timeval *tptr;
2390 };
2391 #endif
2392 /* ARGSUSED */
2393 int
2394 utimes(td, uap)
2395 	struct thread *td;
2396 	register struct utimes_args /* {
2397 		char *path;
2398 		struct timeval *tptr;
2399 	} */ *uap;
2400 {
2401 
2402 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2403 	    UIO_USERSPACE));
2404 }
2405 
2406 int
2407 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2408     struct timeval *tptr, enum uio_seg tptrseg)
2409 {
2410 	struct timespec ts[2];
2411 	int error;
2412 	struct nameidata nd;
2413 
2414 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2415 		return (error);
2416 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2417 	if ((error = namei(&nd)) != 0)
2418 		return (error);
2419 	NDFREE(&nd, NDF_ONLY_PNBUF);
2420 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2421 	vrele(nd.ni_vp);
2422 	return (error);
2423 }
2424 
2425 /*
2426  * Set the access and modification times of a file.
2427  */
2428 #ifndef _SYS_SYSPROTO_H_
2429 struct lutimes_args {
2430 	char	*path;
2431 	struct	timeval *tptr;
2432 };
2433 #endif
2434 /* ARGSUSED */
2435 int
2436 lutimes(td, uap)
2437 	struct thread *td;
2438 	register struct lutimes_args /* {
2439 		char *path;
2440 		struct timeval *tptr;
2441 	} */ *uap;
2442 {
2443 
2444 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2445 	    UIO_USERSPACE));
2446 }
2447 
2448 int
2449 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2450     struct timeval *tptr, enum uio_seg tptrseg)
2451 {
2452 	struct timespec ts[2];
2453 	int error;
2454 	struct nameidata nd;
2455 
2456 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2457 		return (error);
2458 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2459 	if ((error = namei(&nd)) != 0)
2460 		return (error);
2461 	NDFREE(&nd, NDF_ONLY_PNBUF);
2462 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2463 	vrele(nd.ni_vp);
2464 	return (error);
2465 }
2466 
2467 /*
2468  * Set the access and modification times of a file.
2469  */
2470 #ifndef _SYS_SYSPROTO_H_
2471 struct futimes_args {
2472 	int	fd;
2473 	struct	timeval *tptr;
2474 };
2475 #endif
2476 /* ARGSUSED */
2477 int
2478 futimes(td, uap)
2479 	struct thread *td;
2480 	register struct futimes_args /* {
2481 		int  fd;
2482 		struct timeval *tptr;
2483 	} */ *uap;
2484 {
2485 
2486 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2487 }
2488 
2489 int
2490 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2491     enum uio_seg tptrseg)
2492 {
2493 	struct timespec ts[2];
2494 	struct file *fp;
2495 	int error;
2496 
2497 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2498 		return (error);
2499 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2500 		return (error);
2501 	error = setutimes(td, fp->f_data, ts, 2, tptr == NULL);
2502 	fdrop(fp, td);
2503 	return (error);
2504 }
2505 
2506 /*
2507  * Truncate a file given its path name.
2508  */
2509 #ifndef _SYS_SYSPROTO_H_
2510 struct truncate_args {
2511 	char	*path;
2512 	int	pad;
2513 	off_t	length;
2514 };
2515 #endif
2516 /* ARGSUSED */
2517 int
2518 truncate(td, uap)
2519 	struct thread *td;
2520 	register struct truncate_args /* {
2521 		char *path;
2522 		int pad;
2523 		off_t length;
2524 	} */ *uap;
2525 {
2526 
2527 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2528 }
2529 
2530 int
2531 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2532 {
2533 	struct mount *mp;
2534 	struct vnode *vp;
2535 	struct vattr vattr;
2536 	int error;
2537 	struct nameidata nd;
2538 
2539 	if (length < 0)
2540 		return(EINVAL);
2541 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2542 	if ((error = namei(&nd)) != 0)
2543 		return (error);
2544 	vp = nd.ni_vp;
2545 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2546 		vrele(vp);
2547 		return (error);
2548 	}
2549 	NDFREE(&nd, NDF_ONLY_PNBUF);
2550 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2551 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2552 	if (vp->v_type == VDIR)
2553 		error = EISDIR;
2554 #ifdef MAC
2555 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2556 	}
2557 #endif
2558 	else if ((error = vn_writechk(vp)) == 0 &&
2559 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2560 		VATTR_NULL(&vattr);
2561 		vattr.va_size = length;
2562 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2563 	}
2564 	vput(vp);
2565 	vn_finished_write(mp);
2566 	return (error);
2567 }
2568 
2569 /*
2570  * Truncate a file given a file descriptor.
2571  */
2572 #ifndef _SYS_SYSPROTO_H_
2573 struct ftruncate_args {
2574 	int	fd;
2575 	int	pad;
2576 	off_t	length;
2577 };
2578 #endif
2579 /* ARGSUSED */
2580 int
2581 ftruncate(td, uap)
2582 	struct thread *td;
2583 	register struct ftruncate_args /* {
2584 		int fd;
2585 		int pad;
2586 		off_t length;
2587 	} */ *uap;
2588 {
2589 	struct mount *mp;
2590 	struct vattr vattr;
2591 	struct vnode *vp;
2592 	struct file *fp;
2593 	int error;
2594 
2595 	if (uap->length < 0)
2596 		return(EINVAL);
2597 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2598 		return (error);
2599 	if ((fp->f_flag & FWRITE) == 0) {
2600 		fdrop(fp, td);
2601 		return (EINVAL);
2602 	}
2603 	vp = fp->f_data;
2604 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2605 		fdrop(fp, td);
2606 		return (error);
2607 	}
2608 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2609 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2610 	if (vp->v_type == VDIR)
2611 		error = EISDIR;
2612 #ifdef MAC
2613 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2614 	    vp))) {
2615 	}
2616 #endif
2617 	else if ((error = vn_writechk(vp)) == 0) {
2618 		VATTR_NULL(&vattr);
2619 		vattr.va_size = uap->length;
2620 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2621 	}
2622 	VOP_UNLOCK(vp, 0, td);
2623 	vn_finished_write(mp);
2624 	fdrop(fp, td);
2625 	return (error);
2626 }
2627 
2628 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2629 /*
2630  * Truncate a file given its path name.
2631  */
2632 #ifndef _SYS_SYSPROTO_H_
2633 struct otruncate_args {
2634 	char	*path;
2635 	long	length;
2636 };
2637 #endif
2638 /* ARGSUSED */
2639 int
2640 otruncate(td, uap)
2641 	struct thread *td;
2642 	register struct otruncate_args /* {
2643 		char *path;
2644 		long length;
2645 	} */ *uap;
2646 {
2647 	struct truncate_args /* {
2648 		char *path;
2649 		int pad;
2650 		off_t length;
2651 	} */ nuap;
2652 
2653 	nuap.path = uap->path;
2654 	nuap.length = uap->length;
2655 	return (truncate(td, &nuap));
2656 }
2657 
2658 /*
2659  * Truncate a file given a file descriptor.
2660  */
2661 #ifndef _SYS_SYSPROTO_H_
2662 struct oftruncate_args {
2663 	int	fd;
2664 	long	length;
2665 };
2666 #endif
2667 /* ARGSUSED */
2668 int
2669 oftruncate(td, uap)
2670 	struct thread *td;
2671 	register struct oftruncate_args /* {
2672 		int fd;
2673 		long length;
2674 	} */ *uap;
2675 {
2676 	struct ftruncate_args /* {
2677 		int fd;
2678 		int pad;
2679 		off_t length;
2680 	} */ nuap;
2681 
2682 	nuap.fd = uap->fd;
2683 	nuap.length = uap->length;
2684 	return (ftruncate(td, &nuap));
2685 }
2686 #endif /* COMPAT_43 || COMPAT_SUNOS */
2687 
2688 /*
2689  * Sync an open file.
2690  */
2691 #ifndef _SYS_SYSPROTO_H_
2692 struct fsync_args {
2693 	int	fd;
2694 };
2695 #endif
2696 /* ARGSUSED */
2697 int
2698 fsync(td, uap)
2699 	struct thread *td;
2700 	struct fsync_args /* {
2701 		int fd;
2702 	} */ *uap;
2703 {
2704 	struct vnode *vp;
2705 	struct mount *mp;
2706 	struct file *fp;
2707 	vm_object_t obj;
2708 	int error;
2709 
2710 	GIANT_REQUIRED;
2711 
2712 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2713 		return (error);
2714 	vp = fp->f_data;
2715 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2716 		fdrop(fp, td);
2717 		return (error);
2718 	}
2719 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2720 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2721 		vm_object_page_clean(obj, 0, 0, 0);
2722 	}
2723 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2724 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2725 	    && softdep_fsync_hook != NULL)
2726 		error = (*softdep_fsync_hook)(vp);
2727 
2728 	VOP_UNLOCK(vp, 0, td);
2729 	vn_finished_write(mp);
2730 	fdrop(fp, td);
2731 	return (error);
2732 }
2733 
2734 /*
2735  * Rename files.  Source and destination must either both be directories,
2736  * or both not be directories.  If target is a directory, it must be empty.
2737  */
2738 #ifndef _SYS_SYSPROTO_H_
2739 struct rename_args {
2740 	char	*from;
2741 	char	*to;
2742 };
2743 #endif
2744 /* ARGSUSED */
2745 int
2746 rename(td, uap)
2747 	struct thread *td;
2748 	register struct rename_args /* {
2749 		char *from;
2750 		char *to;
2751 	} */ *uap;
2752 {
2753 
2754 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2755 }
2756 
2757 int
2758 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2759 {
2760 	struct mount *mp = NULL;
2761 	struct vnode *tvp, *fvp, *tdvp;
2762 	struct nameidata fromnd, tond;
2763 	int error;
2764 
2765 	bwillwrite();
2766 #ifdef MAC
2767 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2768 	    from, td);
2769 #else
2770 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2771 #endif
2772 	if ((error = namei(&fromnd)) != 0)
2773 		return (error);
2774 #ifdef MAC
2775 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2776 	    fromnd.ni_vp, &fromnd.ni_cnd);
2777 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2778 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2779 #endif
2780 	fvp = fromnd.ni_vp;
2781 	if (error == 0)
2782 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2783 	if (error != 0) {
2784 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2785 		vrele(fromnd.ni_dvp);
2786 		vrele(fvp);
2787 		goto out1;
2788 	}
2789 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2790 	    NOOBJ, pathseg, to, td);
2791 	if (fromnd.ni_vp->v_type == VDIR)
2792 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2793 	if ((error = namei(&tond)) != 0) {
2794 		/* Translate error code for rename("dir1", "dir2/."). */
2795 		if (error == EISDIR && fvp->v_type == VDIR)
2796 			error = EINVAL;
2797 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2798 		vrele(fromnd.ni_dvp);
2799 		vrele(fvp);
2800 		goto out1;
2801 	}
2802 	tdvp = tond.ni_dvp;
2803 	tvp = tond.ni_vp;
2804 	if (tvp != NULL) {
2805 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2806 			error = ENOTDIR;
2807 			goto out;
2808 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2809 			error = EISDIR;
2810 			goto out;
2811 		}
2812 	}
2813 	if (fvp == tdvp)
2814 		error = EINVAL;
2815 	/*
2816 	 * If the source is the same as the destination (that is, if they
2817 	 * are links to the same vnode), then there is nothing to do.
2818 	 */
2819 	if (fvp == tvp)
2820 		error = -1;
2821 #ifdef MAC
2822 	else
2823 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2824 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2825 #endif
2826 out:
2827 	if (!error) {
2828 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2829 		if (fromnd.ni_dvp != tdvp) {
2830 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2831 		}
2832 		if (tvp) {
2833 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2834 		}
2835 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2836 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2837 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2838 		NDFREE(&tond, NDF_ONLY_PNBUF);
2839 	} else {
2840 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2841 		NDFREE(&tond, NDF_ONLY_PNBUF);
2842 		if (tdvp == tvp)
2843 			vrele(tdvp);
2844 		else
2845 			vput(tdvp);
2846 		if (tvp)
2847 			vput(tvp);
2848 		vrele(fromnd.ni_dvp);
2849 		vrele(fvp);
2850 	}
2851 	vrele(tond.ni_startdir);
2852 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2853 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2854 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2855 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2856 out1:
2857 	vn_finished_write(mp);
2858 	if (fromnd.ni_startdir)
2859 		vrele(fromnd.ni_startdir);
2860 	if (error == -1)
2861 		return (0);
2862 	return (error);
2863 }
2864 
2865 /*
2866  * Make a directory file.
2867  */
2868 #ifndef _SYS_SYSPROTO_H_
2869 struct mkdir_args {
2870 	char	*path;
2871 	int	mode;
2872 };
2873 #endif
2874 /* ARGSUSED */
2875 int
2876 mkdir(td, uap)
2877 	struct thread *td;
2878 	register struct mkdir_args /* {
2879 		char *path;
2880 		int mode;
2881 	} */ *uap;
2882 {
2883 
2884 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2885 }
2886 
2887 int
2888 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2889 {
2890 	struct mount *mp;
2891 	struct vnode *vp;
2892 	struct vattr vattr;
2893 	int error;
2894 	struct nameidata nd;
2895 
2896 restart:
2897 	bwillwrite();
2898 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2899 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2900 	if ((error = namei(&nd)) != 0)
2901 		return (error);
2902 	vp = nd.ni_vp;
2903 	if (vp != NULL) {
2904 		NDFREE(&nd, NDF_ONLY_PNBUF);
2905 		vrele(vp);
2906 		/*
2907 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2908 		 * the strange behaviour of leaving the vnode unlocked
2909 		 * if the target is the same vnode as the parent.
2910 		 */
2911 		if (vp == nd.ni_dvp)
2912 			vrele(nd.ni_dvp);
2913 		else
2914 			vput(nd.ni_dvp);
2915 		return (EEXIST);
2916 	}
2917 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2918 		NDFREE(&nd, NDF_ONLY_PNBUF);
2919 		vput(nd.ni_dvp);
2920 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2921 			return (error);
2922 		goto restart;
2923 	}
2924 	VATTR_NULL(&vattr);
2925 	vattr.va_type = VDIR;
2926 	FILEDESC_LOCK(td->td_proc->p_fd);
2927 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2928 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2929 #ifdef MAC
2930 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2931 	    &vattr);
2932 	if (error)
2933 		goto out;
2934 #endif
2935 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2936 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2937 #ifdef MAC
2938 out:
2939 #endif
2940 	NDFREE(&nd, NDF_ONLY_PNBUF);
2941 	vput(nd.ni_dvp);
2942 	if (!error)
2943 		vput(nd.ni_vp);
2944 	vn_finished_write(mp);
2945 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2946 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2947 	return (error);
2948 }
2949 
2950 /*
2951  * Remove a directory file.
2952  */
2953 #ifndef _SYS_SYSPROTO_H_
2954 struct rmdir_args {
2955 	char	*path;
2956 };
2957 #endif
2958 /* ARGSUSED */
2959 int
2960 rmdir(td, uap)
2961 	struct thread *td;
2962 	struct rmdir_args /* {
2963 		char *path;
2964 	} */ *uap;
2965 {
2966 
2967 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2968 }
2969 
2970 int
2971 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2972 {
2973 	struct mount *mp;
2974 	struct vnode *vp;
2975 	int error;
2976 	struct nameidata nd;
2977 
2978 restart:
2979 	bwillwrite();
2980 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2981 	if ((error = namei(&nd)) != 0)
2982 		return (error);
2983 	vp = nd.ni_vp;
2984 	if (vp->v_type != VDIR) {
2985 		error = ENOTDIR;
2986 		goto out;
2987 	}
2988 	/*
2989 	 * No rmdir "." please.
2990 	 */
2991 	if (nd.ni_dvp == vp) {
2992 		error = EINVAL;
2993 		goto out;
2994 	}
2995 	/*
2996 	 * The root of a mounted filesystem cannot be deleted.
2997 	 */
2998 	if (vp->v_vflag & VV_ROOT) {
2999 		error = EBUSY;
3000 		goto out;
3001 	}
3002 #ifdef MAC
3003 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3004 	    &nd.ni_cnd);
3005 	if (error)
3006 		goto out;
3007 #endif
3008 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3009 		NDFREE(&nd, NDF_ONLY_PNBUF);
3010 		if (nd.ni_dvp == vp)
3011 			vrele(nd.ni_dvp);
3012 		else
3013 			vput(nd.ni_dvp);
3014 		vput(vp);
3015 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3016 			return (error);
3017 		goto restart;
3018 	}
3019 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3020 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3021 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3022 	vn_finished_write(mp);
3023 out:
3024 	NDFREE(&nd, NDF_ONLY_PNBUF);
3025 	if (nd.ni_dvp == vp)
3026 		vrele(nd.ni_dvp);
3027 	else
3028 		vput(nd.ni_dvp);
3029 	vput(vp);
3030 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3031 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3032 	return (error);
3033 }
3034 
3035 #ifdef COMPAT_43
3036 /*
3037  * Read a block of directory entries in a filesystem independent format.
3038  */
3039 #ifndef _SYS_SYSPROTO_H_
3040 struct ogetdirentries_args {
3041 	int	fd;
3042 	char	*buf;
3043 	u_int	count;
3044 	long	*basep;
3045 };
3046 #endif
3047 int
3048 ogetdirentries(td, uap)
3049 	struct thread *td;
3050 	register struct ogetdirentries_args /* {
3051 		int fd;
3052 		char *buf;
3053 		u_int count;
3054 		long *basep;
3055 	} */ *uap;
3056 {
3057 	struct vnode *vp;
3058 	struct file *fp;
3059 	struct uio auio, kuio;
3060 	struct iovec aiov, kiov;
3061 	struct dirent *dp, *edp;
3062 	caddr_t dirbuf;
3063 	int error, eofflag, readcnt;
3064 	long loff;
3065 
3066 	/* XXX arbitrary sanity limit on `count'. */
3067 	if (uap->count > 64 * 1024)
3068 		return (EINVAL);
3069 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3070 		return (error);
3071 	if ((fp->f_flag & FREAD) == 0) {
3072 		fdrop(fp, td);
3073 		return (EBADF);
3074 	}
3075 	vp = fp->f_data;
3076 unionread:
3077 	if (vp->v_type != VDIR) {
3078 		fdrop(fp, td);
3079 		return (EINVAL);
3080 	}
3081 	aiov.iov_base = uap->buf;
3082 	aiov.iov_len = uap->count;
3083 	auio.uio_iov = &aiov;
3084 	auio.uio_iovcnt = 1;
3085 	auio.uio_rw = UIO_READ;
3086 	auio.uio_segflg = UIO_USERSPACE;
3087 	auio.uio_td = td;
3088 	auio.uio_resid = uap->count;
3089 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3090 	loff = auio.uio_offset = fp->f_offset;
3091 #ifdef MAC
3092 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3093 	if (error) {
3094 		VOP_UNLOCK(vp, 0, td);
3095 		fdrop(fp, td);
3096 		return (error);
3097 	}
3098 #endif
3099 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3100 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3101 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3102 			    NULL, NULL);
3103 			fp->f_offset = auio.uio_offset;
3104 		} else
3105 #	endif
3106 	{
3107 		kuio = auio;
3108 		kuio.uio_iov = &kiov;
3109 		kuio.uio_segflg = UIO_SYSSPACE;
3110 		kiov.iov_len = uap->count;
3111 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3112 		kiov.iov_base = dirbuf;
3113 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3114 			    NULL, NULL);
3115 		fp->f_offset = kuio.uio_offset;
3116 		if (error == 0) {
3117 			readcnt = uap->count - kuio.uio_resid;
3118 			edp = (struct dirent *)&dirbuf[readcnt];
3119 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3120 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3121 					/*
3122 					 * The expected low byte of
3123 					 * dp->d_namlen is our dp->d_type.
3124 					 * The high MBZ byte of dp->d_namlen
3125 					 * is our dp->d_namlen.
3126 					 */
3127 					dp->d_type = dp->d_namlen;
3128 					dp->d_namlen = 0;
3129 #				else
3130 					/*
3131 					 * The dp->d_type is the high byte
3132 					 * of the expected dp->d_namlen,
3133 					 * so must be zero'ed.
3134 					 */
3135 					dp->d_type = 0;
3136 #				endif
3137 				if (dp->d_reclen > 0) {
3138 					dp = (struct dirent *)
3139 					    ((char *)dp + dp->d_reclen);
3140 				} else {
3141 					error = EIO;
3142 					break;
3143 				}
3144 			}
3145 			if (dp >= edp)
3146 				error = uiomove(dirbuf, readcnt, &auio);
3147 		}
3148 		FREE(dirbuf, M_TEMP);
3149 	}
3150 	VOP_UNLOCK(vp, 0, td);
3151 	if (error) {
3152 		fdrop(fp, td);
3153 		return (error);
3154 	}
3155 	if (uap->count == auio.uio_resid) {
3156 		if (union_dircheckp) {
3157 			error = union_dircheckp(td, &vp, fp);
3158 			if (error == -1)
3159 				goto unionread;
3160 			if (error) {
3161 				fdrop(fp, td);
3162 				return (error);
3163 			}
3164 		}
3165 		/*
3166 		 * XXX We could delay dropping the lock above but
3167 		 * union_dircheckp complicates things.
3168 		 */
3169 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3170 		if ((vp->v_vflag & VV_ROOT) &&
3171 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3172 			struct vnode *tvp = vp;
3173 			vp = vp->v_mount->mnt_vnodecovered;
3174 			VREF(vp);
3175 			fp->f_data = vp;
3176 			fp->f_offset = 0;
3177 			vput(tvp);
3178 			goto unionread;
3179 		}
3180 		VOP_UNLOCK(vp, 0, td);
3181 	}
3182 	error = copyout(&loff, uap->basep, sizeof(long));
3183 	fdrop(fp, td);
3184 	td->td_retval[0] = uap->count - auio.uio_resid;
3185 	return (error);
3186 }
3187 #endif /* COMPAT_43 */
3188 
3189 /*
3190  * Read a block of directory entries in a filesystem independent format.
3191  */
3192 #ifndef _SYS_SYSPROTO_H_
3193 struct getdirentries_args {
3194 	int	fd;
3195 	char	*buf;
3196 	u_int	count;
3197 	long	*basep;
3198 };
3199 #endif
3200 int
3201 getdirentries(td, uap)
3202 	struct thread *td;
3203 	register struct getdirentries_args /* {
3204 		int fd;
3205 		char *buf;
3206 		u_int count;
3207 		long *basep;
3208 	} */ *uap;
3209 {
3210 	struct vnode *vp;
3211 	struct file *fp;
3212 	struct uio auio;
3213 	struct iovec aiov;
3214 	long loff;
3215 	int error, eofflag;
3216 
3217 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3218 		return (error);
3219 	if ((fp->f_flag & FREAD) == 0) {
3220 		fdrop(fp, td);
3221 		return (EBADF);
3222 	}
3223 	vp = fp->f_data;
3224 unionread:
3225 	if (vp->v_type != VDIR) {
3226 		fdrop(fp, td);
3227 		return (EINVAL);
3228 	}
3229 	aiov.iov_base = uap->buf;
3230 	aiov.iov_len = uap->count;
3231 	auio.uio_iov = &aiov;
3232 	auio.uio_iovcnt = 1;
3233 	auio.uio_rw = UIO_READ;
3234 	auio.uio_segflg = UIO_USERSPACE;
3235 	auio.uio_td = td;
3236 	auio.uio_resid = uap->count;
3237 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3238 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3239 	loff = auio.uio_offset = fp->f_offset;
3240 #ifdef MAC
3241 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3242 	if (error == 0)
3243 #endif
3244 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3245 		    NULL);
3246 	fp->f_offset = auio.uio_offset;
3247 	VOP_UNLOCK(vp, 0, td);
3248 	if (error) {
3249 		fdrop(fp, td);
3250 		return (error);
3251 	}
3252 	if (uap->count == auio.uio_resid) {
3253 		if (union_dircheckp) {
3254 			error = union_dircheckp(td, &vp, fp);
3255 			if (error == -1)
3256 				goto unionread;
3257 			if (error) {
3258 				fdrop(fp, td);
3259 				return (error);
3260 			}
3261 		}
3262 		/*
3263 		 * XXX We could delay dropping the lock above but
3264 		 * union_dircheckp complicates things.
3265 		 */
3266 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3267 		if ((vp->v_vflag & VV_ROOT) &&
3268 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3269 			struct vnode *tvp = vp;
3270 			vp = vp->v_mount->mnt_vnodecovered;
3271 			VREF(vp);
3272 			fp->f_data = vp;
3273 			fp->f_offset = 0;
3274 			vput(tvp);
3275 			goto unionread;
3276 		}
3277 		VOP_UNLOCK(vp, 0, td);
3278 	}
3279 	if (uap->basep != NULL) {
3280 		error = copyout(&loff, uap->basep, sizeof(long));
3281 	}
3282 	td->td_retval[0] = uap->count - auio.uio_resid;
3283 	fdrop(fp, td);
3284 	return (error);
3285 }
3286 #ifndef _SYS_SYSPROTO_H_
3287 struct getdents_args {
3288 	int fd;
3289 	char *buf;
3290 	size_t count;
3291 };
3292 #endif
3293 int
3294 getdents(td, uap)
3295 	struct thread *td;
3296 	register struct getdents_args /* {
3297 		int fd;
3298 		char *buf;
3299 		u_int count;
3300 	} */ *uap;
3301 {
3302 	struct getdirentries_args ap;
3303 	ap.fd = uap->fd;
3304 	ap.buf = uap->buf;
3305 	ap.count = uap->count;
3306 	ap.basep = NULL;
3307 	return getdirentries(td, &ap);
3308 }
3309 
3310 /*
3311  * Set the mode mask for creation of filesystem nodes.
3312  *
3313  * MP SAFE
3314  */
3315 #ifndef _SYS_SYSPROTO_H_
3316 struct umask_args {
3317 	int	newmask;
3318 };
3319 #endif
3320 int
3321 umask(td, uap)
3322 	struct thread *td;
3323 	struct umask_args /* {
3324 		int newmask;
3325 	} */ *uap;
3326 {
3327 	register struct filedesc *fdp;
3328 
3329 	FILEDESC_LOCK(td->td_proc->p_fd);
3330 	fdp = td->td_proc->p_fd;
3331 	td->td_retval[0] = fdp->fd_cmask;
3332 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3333 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3334 	return (0);
3335 }
3336 
3337 /*
3338  * Void all references to file by ripping underlying filesystem
3339  * away from vnode.
3340  */
3341 #ifndef _SYS_SYSPROTO_H_
3342 struct revoke_args {
3343 	char	*path;
3344 };
3345 #endif
3346 /* ARGSUSED */
3347 int
3348 revoke(td, uap)
3349 	struct thread *td;
3350 	register struct revoke_args /* {
3351 		char *path;
3352 	} */ *uap;
3353 {
3354 	struct mount *mp;
3355 	struct vnode *vp;
3356 	struct vattr vattr;
3357 	int error;
3358 	struct nameidata nd;
3359 
3360 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3361 	if ((error = namei(&nd)) != 0)
3362 		return (error);
3363 	vp = nd.ni_vp;
3364 	NDFREE(&nd, NDF_ONLY_PNBUF);
3365 	if (vp->v_type != VCHR) {
3366 		vput(vp);
3367 		return (EINVAL);
3368 	}
3369 #ifdef MAC
3370 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3371 	if (error) {
3372 		vput(vp);
3373 		return (error);
3374 	}
3375 #endif
3376 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3377 	if (error) {
3378 		vput(vp);
3379 		return (error);
3380 	}
3381 	VOP_UNLOCK(vp, 0, td);
3382 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3383 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3384 		if (error)
3385 			goto out;
3386 	}
3387 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3388 		goto out;
3389 	if (vcount(vp) > 1)
3390 		VOP_REVOKE(vp, REVOKEALL);
3391 	vn_finished_write(mp);
3392 out:
3393 	vrele(vp);
3394 	return (error);
3395 }
3396 
3397 /*
3398  * Convert a user file descriptor to a kernel file entry.
3399  * The file entry is locked upon returning.
3400  */
3401 int
3402 getvnode(fdp, fd, fpp)
3403 	struct filedesc *fdp;
3404 	int fd;
3405 	struct file **fpp;
3406 {
3407 	int error;
3408 	struct file *fp;
3409 
3410 	fp = NULL;
3411 	if (fdp == NULL)
3412 		error = EBADF;
3413 	else {
3414 		FILEDESC_LOCK(fdp);
3415 		if ((u_int)fd >= fdp->fd_nfiles ||
3416 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3417 			error = EBADF;
3418 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3419 			fp = NULL;
3420 			error = EINVAL;
3421 		} else {
3422 			fhold(fp);
3423 			error = 0;
3424 		}
3425 		FILEDESC_UNLOCK(fdp);
3426 	}
3427 	*fpp = fp;
3428 	return (error);
3429 }
3430 /*
3431  * Get (NFS) file handle
3432  */
3433 #ifndef _SYS_SYSPROTO_H_
3434 struct getfh_args {
3435 	char	*fname;
3436 	fhandle_t *fhp;
3437 };
3438 #endif
3439 int
3440 getfh(td, uap)
3441 	struct thread *td;
3442 	register struct getfh_args *uap;
3443 {
3444 	struct nameidata nd;
3445 	fhandle_t fh;
3446 	register struct vnode *vp;
3447 	int error;
3448 
3449 	/*
3450 	 * Must be super user
3451 	 */
3452 	error = suser(td);
3453 	if (error)
3454 		return (error);
3455 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3456 	error = namei(&nd);
3457 	if (error)
3458 		return (error);
3459 	NDFREE(&nd, NDF_ONLY_PNBUF);
3460 	vp = nd.ni_vp;
3461 	bzero(&fh, sizeof(fh));
3462 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3463 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3464 	vput(vp);
3465 	if (error)
3466 		return (error);
3467 	error = copyout(&fh, uap->fhp, sizeof (fh));
3468 	return (error);
3469 }
3470 
3471 /*
3472  * syscall for the rpc.lockd to use to translate a NFS file handle into
3473  * an open descriptor.
3474  *
3475  * warning: do not remove the suser() call or this becomes one giant
3476  * security hole.
3477  */
3478 #ifndef _SYS_SYSPROTO_H_
3479 struct fhopen_args {
3480 	const struct fhandle *u_fhp;
3481 	int flags;
3482 };
3483 #endif
3484 int
3485 fhopen(td, uap)
3486 	struct thread *td;
3487 	struct fhopen_args /* {
3488 		const struct fhandle *u_fhp;
3489 		int flags;
3490 	} */ *uap;
3491 {
3492 	struct proc *p = td->td_proc;
3493 	struct mount *mp;
3494 	struct vnode *vp;
3495 	struct fhandle fhp;
3496 	struct vattr vat;
3497 	struct vattr *vap = &vat;
3498 	struct flock lf;
3499 	struct file *fp;
3500 	register struct filedesc *fdp = p->p_fd;
3501 	int fmode, mode, error, type;
3502 	struct file *nfp;
3503 	int indx;
3504 
3505 	/*
3506 	 * Must be super user
3507 	 */
3508 	error = suser(td);
3509 	if (error)
3510 		return (error);
3511 
3512 	fmode = FFLAGS(uap->flags);
3513 	/* why not allow a non-read/write open for our lockd? */
3514 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3515 		return (EINVAL);
3516 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3517 	if (error)
3518 		return(error);
3519 	/* find the mount point */
3520 	mp = vfs_getvfs(&fhp.fh_fsid);
3521 	if (mp == NULL)
3522 		return (ESTALE);
3523 	/* now give me my vnode, it gets returned to me locked */
3524 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3525 	if (error)
3526 		return (error);
3527  	/*
3528 	 * from now on we have to make sure not
3529 	 * to forget about the vnode
3530 	 * any error that causes an abort must vput(vp)
3531 	 * just set error = err and 'goto bad;'.
3532 	 */
3533 
3534 	/*
3535 	 * from vn_open
3536 	 */
3537 	if (vp->v_type == VLNK) {
3538 		error = EMLINK;
3539 		goto bad;
3540 	}
3541 	if (vp->v_type == VSOCK) {
3542 		error = EOPNOTSUPP;
3543 		goto bad;
3544 	}
3545 	mode = 0;
3546 	if (fmode & (FWRITE | O_TRUNC)) {
3547 		if (vp->v_type == VDIR) {
3548 			error = EISDIR;
3549 			goto bad;
3550 		}
3551 		error = vn_writechk(vp);
3552 		if (error)
3553 			goto bad;
3554 		mode |= VWRITE;
3555 	}
3556 	if (fmode & FREAD)
3557 		mode |= VREAD;
3558 	if (fmode & O_APPEND)
3559 		mode |= VAPPEND;
3560 #ifdef MAC
3561 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3562 	if (error)
3563 		goto bad;
3564 #endif
3565 	if (mode) {
3566 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3567 		if (error)
3568 			goto bad;
3569 	}
3570 	if (fmode & O_TRUNC) {
3571 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3572 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3573 			vrele(vp);
3574 			return (error);
3575 		}
3576 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3577 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3578 #ifdef MAC
3579 		/*
3580 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3581 		 * should be right.
3582 		 */
3583 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3584 		if (error == 0) {
3585 #endif
3586 			VATTR_NULL(vap);
3587 			vap->va_size = 0;
3588 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3589 #ifdef MAC
3590 		}
3591 #endif
3592 		vn_finished_write(mp);
3593 		if (error)
3594 			goto bad;
3595 	}
3596 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3597 	if (error)
3598 		goto bad;
3599 	/*
3600 	 * Make sure that a VM object is created for VMIO support.
3601 	 */
3602 	if (vn_canvmio(vp) == TRUE) {
3603 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3604 			goto bad;
3605 	}
3606 	if (fmode & FWRITE)
3607 		vp->v_writecount++;
3608 
3609 	/*
3610 	 * end of vn_open code
3611 	 */
3612 
3613 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3614 		if (fmode & FWRITE)
3615 			vp->v_writecount--;
3616 		goto bad;
3617 	}
3618 	fp = nfp;
3619 
3620 	/*
3621 	 * Hold an extra reference to avoid having fp ripped out
3622 	 * from under us while we block in the lock op
3623 	 */
3624 	fhold(fp);
3625 	nfp->f_data = vp;
3626 	nfp->f_flag = fmode & FMASK;
3627 	nfp->f_ops = &vnops;
3628 	nfp->f_type = DTYPE_VNODE;
3629 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3630 		lf.l_whence = SEEK_SET;
3631 		lf.l_start = 0;
3632 		lf.l_len = 0;
3633 		if (fmode & O_EXLOCK)
3634 			lf.l_type = F_WRLCK;
3635 		else
3636 			lf.l_type = F_RDLCK;
3637 		type = F_FLOCK;
3638 		if ((fmode & FNONBLOCK) == 0)
3639 			type |= F_WAIT;
3640 		VOP_UNLOCK(vp, 0, td);
3641 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3642 			    type)) != 0) {
3643 			/*
3644 			 * The lock request failed.  Normally close the
3645 			 * descriptor but handle the case where someone might
3646 			 * have dup()d or close()d it when we weren't looking.
3647 			 */
3648 			FILEDESC_LOCK(fdp);
3649 			if (fdp->fd_ofiles[indx] == fp) {
3650 				fdp->fd_ofiles[indx] = NULL;
3651 				FILEDESC_UNLOCK(fdp);
3652 				fdrop(fp, td);
3653 			} else
3654 				FILEDESC_UNLOCK(fdp);
3655 			/*
3656 			 * release our private reference
3657 			 */
3658 			fdrop(fp, td);
3659 			return(error);
3660 		}
3661 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3662 		fp->f_flag |= FHASLOCK;
3663 	}
3664 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3665 		vfs_object_create(vp, td, td->td_ucred);
3666 
3667 	VOP_UNLOCK(vp, 0, td);
3668 	fdrop(fp, td);
3669 	td->td_retval[0] = indx;
3670 	return (0);
3671 
3672 bad:
3673 	vput(vp);
3674 	return (error);
3675 }
3676 
3677 /*
3678  * Stat an (NFS) file handle.
3679  */
3680 #ifndef _SYS_SYSPROTO_H_
3681 struct fhstat_args {
3682 	struct fhandle *u_fhp;
3683 	struct stat *sb;
3684 };
3685 #endif
3686 int
3687 fhstat(td, uap)
3688 	struct thread *td;
3689 	register struct fhstat_args /* {
3690 		struct fhandle *u_fhp;
3691 		struct stat *sb;
3692 	} */ *uap;
3693 {
3694 	struct stat sb;
3695 	fhandle_t fh;
3696 	struct mount *mp;
3697 	struct vnode *vp;
3698 	int error;
3699 
3700 	/*
3701 	 * Must be super user
3702 	 */
3703 	error = suser(td);
3704 	if (error)
3705 		return (error);
3706 
3707 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3708 	if (error)
3709 		return (error);
3710 
3711 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3712 		return (ESTALE);
3713 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3714 		return (error);
3715 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3716 	vput(vp);
3717 	if (error)
3718 		return (error);
3719 	error = copyout(&sb, uap->sb, sizeof(sb));
3720 	return (error);
3721 }
3722 
3723 /*
3724  * Implement fstatfs() for (NFS) file handles.
3725  */
3726 #ifndef _SYS_SYSPROTO_H_
3727 struct fhstatfs_args {
3728 	struct fhandle *u_fhp;
3729 	struct statfs *buf;
3730 };
3731 #endif
3732 int
3733 fhstatfs(td, uap)
3734 	struct thread *td;
3735 	struct fhstatfs_args /* {
3736 		struct fhandle *u_fhp;
3737 		struct statfs *buf;
3738 	} */ *uap;
3739 {
3740 	struct statfs *sp;
3741 	struct mount *mp;
3742 	struct vnode *vp;
3743 	struct statfs sb;
3744 	fhandle_t fh;
3745 	int error;
3746 
3747 	/*
3748 	 * Must be super user
3749 	 */
3750 	error = suser(td);
3751 	if (error)
3752 		return (error);
3753 
3754 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3755 		return (error);
3756 
3757 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3758 		return (ESTALE);
3759 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3760 		return (error);
3761 	mp = vp->v_mount;
3762 	sp = &mp->mnt_stat;
3763 	vput(vp);
3764 #ifdef MAC
3765 	error = mac_check_mount_stat(td->td_ucred, mp);
3766 	if (error)
3767 		return (error);
3768 #endif
3769 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3770 		return (error);
3771 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3772 	if (suser(td)) {
3773 		bcopy(sp, &sb, sizeof(sb));
3774 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3775 		sp = &sb;
3776 	}
3777 	return (copyout(sp, uap->buf, sizeof(*sp)));
3778 }
3779 
3780 /*
3781  * Syscall to push extended attribute configuration information into the
3782  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3783  * a command (int cmd), and attribute name and misc data.  For now, the
3784  * attribute name is left in userspace for consumption by the VFS_op.
3785  * It will probably be changed to be copied into sysspace by the
3786  * syscall in the future, once issues with various consumers of the
3787  * attribute code have raised their hands.
3788  *
3789  * Currently this is used only by UFS Extended Attributes.
3790  */
3791 int
3792 extattrctl(td, uap)
3793 	struct thread *td;
3794 	struct extattrctl_args /* {
3795 		const char *path;
3796 		int cmd;
3797 		const char *filename;
3798 		int attrnamespace;
3799 		const char *attrname;
3800 	} */ *uap;
3801 {
3802 	struct vnode *filename_vp;
3803 	struct nameidata nd;
3804 	struct mount *mp, *mp_writable;
3805 	char attrname[EXTATTR_MAXNAMELEN];
3806 	int error;
3807 
3808 	/*
3809 	 * uap->attrname is not always defined.  We check again later when we
3810 	 * invoke the VFS call so as to pass in NULL there if needed.
3811 	 */
3812 	if (uap->attrname != NULL) {
3813 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3814 		    NULL);
3815 		if (error)
3816 			return (error);
3817 	}
3818 
3819 	/*
3820 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3821 	 * which VFS_EXTATTRCTL() will later release.
3822 	 */
3823 	filename_vp = NULL;
3824 	if (uap->filename != NULL) {
3825 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3826 		    uap->filename, td);
3827 		error = namei(&nd);
3828 		if (error)
3829 			return (error);
3830 		filename_vp = nd.ni_vp;
3831 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3832 	}
3833 
3834 	/* uap->path is always defined. */
3835 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3836 	error = namei(&nd);
3837 	if (error) {
3838 		if (filename_vp != NULL)
3839 			vput(filename_vp);
3840 		return (error);
3841 	}
3842 	mp = nd.ni_vp->v_mount;
3843 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3844 	NDFREE(&nd, 0);
3845 	if (error) {
3846 		if (filename_vp != NULL)
3847 			vput(filename_vp);
3848 		return (error);
3849 	}
3850 
3851 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3852 	    uap->attrname != NULL ? attrname : NULL, td);
3853 
3854 	vn_finished_write(mp_writable);
3855 	/*
3856 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3857 	 * filename_vp, so vrele it if it is defined.
3858 	 */
3859 	if (filename_vp != NULL)
3860 		vrele(filename_vp);
3861 	return (error);
3862 }
3863 
3864 /*-
3865  * Set a named extended attribute on a file or directory
3866  *
3867  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3868  *            kernelspace string pointer "attrname", userspace buffer
3869  *            pointer "data", buffer length "nbytes", thread "td".
3870  * Returns: 0 on success, an error number otherwise
3871  * Locks: none
3872  * References: vp must be a valid reference for the duration of the call
3873  */
3874 static int
3875 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3876     void *data, size_t nbytes, struct thread *td)
3877 {
3878 	struct mount *mp;
3879 	struct uio auio;
3880 	struct iovec aiov;
3881 	ssize_t cnt;
3882 	int error;
3883 
3884 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3885 	if (error)
3886 		return (error);
3887 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3888 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3889 
3890 	aiov.iov_base = data;
3891 	aiov.iov_len = nbytes;
3892 	auio.uio_iov = &aiov;
3893 	auio.uio_iovcnt = 1;
3894 	auio.uio_offset = 0;
3895 	if (nbytes > INT_MAX) {
3896 		error = EINVAL;
3897 		goto done;
3898 	}
3899 	auio.uio_resid = nbytes;
3900 	auio.uio_rw = UIO_WRITE;
3901 	auio.uio_segflg = UIO_USERSPACE;
3902 	auio.uio_td = td;
3903 	cnt = nbytes;
3904 
3905 #ifdef MAC
3906 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3907 	    attrname, &auio);
3908 	if (error)
3909 		goto done;
3910 #endif
3911 
3912 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3913 	    td->td_ucred, td);
3914 	cnt -= auio.uio_resid;
3915 	td->td_retval[0] = cnt;
3916 
3917 done:
3918 	VOP_UNLOCK(vp, 0, td);
3919 	vn_finished_write(mp);
3920 	return (error);
3921 }
3922 
3923 int
3924 extattr_set_fd(td, uap)
3925 	struct thread *td;
3926 	struct extattr_set_fd_args /* {
3927 		int fd;
3928 		int attrnamespace;
3929 		const char *attrname;
3930 		void *data;
3931 		size_t nbytes;
3932 	} */ *uap;
3933 {
3934 	struct file *fp;
3935 	char attrname[EXTATTR_MAXNAMELEN];
3936 	int error;
3937 
3938 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3939 	if (error)
3940 		return (error);
3941 
3942 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3943 	if (error)
3944 		return (error);
3945 
3946 	error = extattr_set_vp(fp->f_data, uap->attrnamespace,
3947 	    attrname, uap->data, uap->nbytes, td);
3948 	fdrop(fp, td);
3949 
3950 	return (error);
3951 }
3952 
3953 int
3954 extattr_set_file(td, uap)
3955 	struct thread *td;
3956 	struct extattr_set_file_args /* {
3957 		const char *path;
3958 		int attrnamespace;
3959 		const char *attrname;
3960 		void *data;
3961 		size_t nbytes;
3962 	} */ *uap;
3963 {
3964 	struct nameidata nd;
3965 	char attrname[EXTATTR_MAXNAMELEN];
3966 	int error;
3967 
3968 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3969 	if (error)
3970 		return (error);
3971 
3972 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3973 	error = namei(&nd);
3974 	if (error)
3975 		return (error);
3976 	NDFREE(&nd, NDF_ONLY_PNBUF);
3977 
3978 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3979 	    uap->data, uap->nbytes, td);
3980 
3981 	vrele(nd.ni_vp);
3982 	return (error);
3983 }
3984 
3985 int
3986 extattr_set_link(td, uap)
3987 	struct thread *td;
3988 	struct extattr_set_link_args /* {
3989 		const char *path;
3990 		int attrnamespace;
3991 		const char *attrname;
3992 		void *data;
3993 		size_t nbytes;
3994 	} */ *uap;
3995 {
3996 	struct nameidata nd;
3997 	char attrname[EXTATTR_MAXNAMELEN];
3998 	int error;
3999 
4000 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4001 	if (error)
4002 		return (error);
4003 
4004 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4005 	error = namei(&nd);
4006 	if (error)
4007 		return (error);
4008 	NDFREE(&nd, NDF_ONLY_PNBUF);
4009 
4010 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4011 	    uap->data, uap->nbytes, td);
4012 
4013 	vrele(nd.ni_vp);
4014 	return (error);
4015 }
4016 
4017 /*-
4018  * Get a named extended attribute on a file or directory
4019  *
4020  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4021  *            kernelspace string pointer "attrname", userspace buffer
4022  *            pointer "data", buffer length "nbytes", thread "td".
4023  * Returns: 0 on success, an error number otherwise
4024  * Locks: none
4025  * References: vp must be a valid reference for the duration of the call
4026  */
4027 static int
4028 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4029     void *data, size_t nbytes, struct thread *td)
4030 {
4031 	struct uio auio, *auiop;
4032 	struct iovec aiov;
4033 	ssize_t cnt;
4034 	size_t size, *sizep;
4035 	int error;
4036 
4037 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4038 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4039 
4040 	/*
4041 	 * Slightly unusual semantics: if the user provides a NULL data
4042 	 * pointer, they don't want to receive the data, just the
4043 	 * maximum read length.
4044 	 */
4045 	auiop = NULL;
4046 	sizep = NULL;
4047 	cnt = 0;
4048 	if (data != NULL) {
4049 		aiov.iov_base = data;
4050 		aiov.iov_len = nbytes;
4051 		auio.uio_iov = &aiov;
4052 		auio.uio_offset = 0;
4053 		if (nbytes > INT_MAX) {
4054 			error = EINVAL;
4055 			goto done;
4056 		}
4057 		auio.uio_resid = nbytes;
4058 		auio.uio_rw = UIO_READ;
4059 		auio.uio_segflg = UIO_USERSPACE;
4060 		auio.uio_td = td;
4061 		auiop = &auio;
4062 		cnt = nbytes;
4063 	} else
4064 		sizep = &size;
4065 
4066 #ifdef MAC
4067 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4068 	    attrname, &auio);
4069 	if (error)
4070 		goto done;
4071 #endif
4072 
4073 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4074 	    td->td_ucred, td);
4075 
4076 	if (auiop != NULL) {
4077 		cnt -= auio.uio_resid;
4078 		td->td_retval[0] = cnt;
4079 	} else
4080 		td->td_retval[0] = size;
4081 
4082 done:
4083 	VOP_UNLOCK(vp, 0, td);
4084 	return (error);
4085 }
4086 
4087 int
4088 extattr_get_fd(td, uap)
4089 	struct thread *td;
4090 	struct extattr_get_fd_args /* {
4091 		int fd;
4092 		int attrnamespace;
4093 		const char *attrname;
4094 		void *data;
4095 		size_t nbytes;
4096 	} */ *uap;
4097 {
4098 	struct file *fp;
4099 	char attrname[EXTATTR_MAXNAMELEN];
4100 	int error;
4101 
4102 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4103 	if (error)
4104 		return (error);
4105 
4106 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4107 	if (error)
4108 		return (error);
4109 
4110 	error = extattr_get_vp(fp->f_data, uap->attrnamespace,
4111 	    attrname, uap->data, uap->nbytes, td);
4112 
4113 	fdrop(fp, td);
4114 	return (error);
4115 }
4116 
4117 int
4118 extattr_get_file(td, uap)
4119 	struct thread *td;
4120 	struct extattr_get_file_args /* {
4121 		const char *path;
4122 		int attrnamespace;
4123 		const char *attrname;
4124 		void *data;
4125 		size_t nbytes;
4126 	} */ *uap;
4127 {
4128 	struct nameidata nd;
4129 	char attrname[EXTATTR_MAXNAMELEN];
4130 	int error;
4131 
4132 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4133 	if (error)
4134 		return (error);
4135 
4136 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4137 	error = namei(&nd);
4138 	if (error)
4139 		return (error);
4140 	NDFREE(&nd, NDF_ONLY_PNBUF);
4141 
4142 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4143 	    uap->data, uap->nbytes, td);
4144 
4145 	vrele(nd.ni_vp);
4146 	return (error);
4147 }
4148 
4149 int
4150 extattr_get_link(td, uap)
4151 	struct thread *td;
4152 	struct extattr_get_link_args /* {
4153 		const char *path;
4154 		int attrnamespace;
4155 		const char *attrname;
4156 		void *data;
4157 		size_t nbytes;
4158 	} */ *uap;
4159 {
4160 	struct nameidata nd;
4161 	char attrname[EXTATTR_MAXNAMELEN];
4162 	int error;
4163 
4164 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4165 	if (error)
4166 		return (error);
4167 
4168 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4169 	error = namei(&nd);
4170 	if (error)
4171 		return (error);
4172 	NDFREE(&nd, NDF_ONLY_PNBUF);
4173 
4174 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4175 	    uap->data, uap->nbytes, td);
4176 
4177 	vrele(nd.ni_vp);
4178 	return (error);
4179 }
4180 
4181 /*
4182  * extattr_delete_vp(): Delete a named extended attribute on a file or
4183  *                      directory
4184  *
4185  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4186  *            kernelspace string pointer "attrname", proc "p"
4187  * Returns: 0 on success, an error number otherwise
4188  * Locks: none
4189  * References: vp must be a valid reference for the duration of the call
4190  */
4191 static int
4192 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4193     struct thread *td)
4194 {
4195 	struct mount *mp;
4196 	int error;
4197 
4198 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4199 	if (error)
4200 		return (error);
4201 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4202 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4203 
4204 #ifdef MAC
4205 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4206 	    attrname, NULL);
4207 	if (error)
4208 		goto done;
4209 #endif
4210 
4211 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4212 	    td);
4213 #ifdef MAC
4214 done:
4215 #endif
4216 	VOP_UNLOCK(vp, 0, td);
4217 	vn_finished_write(mp);
4218 	return (error);
4219 }
4220 
4221 int
4222 extattr_delete_fd(td, uap)
4223 	struct thread *td;
4224 	struct extattr_delete_fd_args /* {
4225 		int fd;
4226 		int attrnamespace;
4227 		const char *attrname;
4228 	} */ *uap;
4229 {
4230 	struct file *fp;
4231 	struct vnode *vp;
4232 	char attrname[EXTATTR_MAXNAMELEN];
4233 	int error;
4234 
4235 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4236 	if (error)
4237 		return (error);
4238 
4239 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4240 	if (error)
4241 		return (error);
4242 	vp = fp->f_data;
4243 
4244 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4245 	fdrop(fp, td);
4246 	return (error);
4247 }
4248 
4249 int
4250 extattr_delete_file(td, uap)
4251 	struct thread *td;
4252 	struct extattr_delete_file_args /* {
4253 		const char *path;
4254 		int attrnamespace;
4255 		const char *attrname;
4256 	} */ *uap;
4257 {
4258 	struct nameidata nd;
4259 	char attrname[EXTATTR_MAXNAMELEN];
4260 	int error;
4261 
4262 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4263 	if (error)
4264 		return(error);
4265 
4266 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4267 	error = namei(&nd);
4268 	if (error)
4269 		return(error);
4270 	NDFREE(&nd, NDF_ONLY_PNBUF);
4271 
4272 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4273 	vrele(nd.ni_vp);
4274 	return(error);
4275 }
4276 
4277 int
4278 extattr_delete_link(td, uap)
4279 	struct thread *td;
4280 	struct extattr_delete_link_args /* {
4281 		const char *path;
4282 		int attrnamespace;
4283 		const char *attrname;
4284 	} */ *uap;
4285 {
4286 	struct nameidata nd;
4287 	char attrname[EXTATTR_MAXNAMELEN];
4288 	int error;
4289 
4290 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4291 	if (error)
4292 		return(error);
4293 
4294 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4295 	error = namei(&nd);
4296 	if (error)
4297 		return(error);
4298 	NDFREE(&nd, NDF_ONLY_PNBUF);
4299 
4300 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4301 	vrele(nd.ni_vp);
4302 	return(error);
4303 }
4304