xref: /freebsd/sys/kern/vfs_extattr.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/mac.h>
52 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/linker.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/proc.h>
67 #include <sys/dirent.h>
68 #include <sys/extattr.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 
73 #include <machine/limits.h>
74 #include <machine/stdarg.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int change_dir(struct nameidata *ndp, struct thread *td);
82 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
83 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
84 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
85 static int setfmode(struct thread *td, struct vnode *, int);
86 static int setfflags(struct thread *td, struct vnode *, int);
87 static int setutimes(struct thread *td, struct vnode *,
88     const struct timespec *, int, int);
89 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90     struct thread *td);
91 
92 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93 int (*softdep_fsync_hook)(struct vnode *);
94 
95 /*
96  * Sync each mounted filesystem.
97  */
98 #ifndef _SYS_SYSPROTO_H_
99 struct sync_args {
100         int     dummy;
101 };
102 #endif
103 
104 #ifdef DEBUG
105 static int syncprt = 0;
106 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
107 #endif
108 
109 /* ARGSUSED */
110 int
111 sync(td, uap)
112 	struct thread *td;
113 	struct sync_args *uap;
114 {
115 	struct mount *mp, *nmp;
116 	int asyncflag;
117 
118 	mtx_lock(&mountlist_mtx);
119 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
120 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
121 			nmp = TAILQ_NEXT(mp, mnt_list);
122 			continue;
123 		}
124 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
125 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
126 			asyncflag = mp->mnt_flag & MNT_ASYNC;
127 			mp->mnt_flag &= ~MNT_ASYNC;
128 			vfs_msync(mp, MNT_NOWAIT);
129 			VFS_SYNC(mp, MNT_NOWAIT,
130 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
131 			mp->mnt_flag |= asyncflag;
132 			vn_finished_write(mp);
133 		}
134 		mtx_lock(&mountlist_mtx);
135 		nmp = TAILQ_NEXT(mp, mnt_list);
136 		vfs_unbusy(mp, td);
137 	}
138 	mtx_unlock(&mountlist_mtx);
139 #if 0
140 /*
141  * XXX don't call vfs_bufstats() yet because that routine
142  * was not imported in the Lite2 merge.
143  */
144 #ifdef DIAGNOSTIC
145 	if (syncprt)
146 		vfs_bufstats();
147 #endif /* DIAGNOSTIC */
148 #endif
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  */
161 #ifndef _SYS_SYSPROTO_H_
162 struct quotactl_args {
163 	char *path;
164 	int cmd;
165 	int uid;
166 	caddr_t arg;
167 };
168 #endif
169 /* ARGSUSED */
170 int
171 quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		syscallarg(char *) path;
175 		syscallarg(int) cmd;
176 		syscallarg(int) uid;
177 		syscallarg(caddr_t) arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int error;
182 	struct nameidata nd;
183 
184 	if (jailed(td->td_ucred) && !prison_quotas)
185 		return (EPERM);
186 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
187 	if ((error = namei(&nd)) != 0)
188 		return (error);
189 	NDFREE(&nd, NDF_ONLY_PNBUF);
190 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
191 	vrele(nd.ni_vp);
192 	if (error)
193 		return (error);
194 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
195 	    SCARG(uap, arg), td);
196 	vn_finished_write(mp);
197 	return (error);
198 }
199 
200 /*
201  * Get filesystem statistics.
202  */
203 #ifndef _SYS_SYSPROTO_H_
204 struct statfs_args {
205 	char *path;
206 	struct statfs *buf;
207 };
208 #endif
209 /* ARGSUSED */
210 int
211 statfs(td, uap)
212 	struct thread *td;
213 	register struct statfs_args /* {
214 		syscallarg(char *) path;
215 		syscallarg(struct statfs *) buf;
216 	} */ *uap;
217 {
218 	register struct mount *mp;
219 	register struct statfs *sp;
220 	int error;
221 	struct nameidata nd;
222 	struct statfs sb;
223 
224 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
225 	if ((error = namei(&nd)) != 0)
226 		return (error);
227 	mp = nd.ni_vp->v_mount;
228 	sp = &mp->mnt_stat;
229 	NDFREE(&nd, NDF_ONLY_PNBUF);
230 	vrele(nd.ni_vp);
231 #ifdef MAC
232 	error = mac_check_mount_stat(td->td_ucred, mp);
233 	if (error)
234 		return (error);
235 #endif
236 	error = VFS_STATFS(mp, sp, td);
237 	if (error)
238 		return (error);
239 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
240 	if (suser(td)) {
241 		bcopy(sp, &sb, sizeof(sb));
242 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
243 		sp = &sb;
244 	}
245 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
246 }
247 
248 /*
249  * Get filesystem statistics.
250  */
251 #ifndef _SYS_SYSPROTO_H_
252 struct fstatfs_args {
253 	int fd;
254 	struct statfs *buf;
255 };
256 #endif
257 /* ARGSUSED */
258 int
259 fstatfs(td, uap)
260 	struct thread *td;
261 	register struct fstatfs_args /* {
262 		syscallarg(int) fd;
263 		syscallarg(struct statfs *) buf;
264 	} */ *uap;
265 {
266 	struct file *fp;
267 	struct mount *mp;
268 	register struct statfs *sp;
269 	int error;
270 	struct statfs sb;
271 
272 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
273 		return (error);
274 	mp = ((struct vnode *)fp->f_data)->v_mount;
275 	fdrop(fp, td);
276 	if (mp == NULL)
277 		return (EBADF);
278 #ifdef MAC
279 	error = mac_check_mount_stat(td->td_ucred, mp);
280 	if (error)
281 		return (error);
282 #endif
283 	sp = &mp->mnt_stat;
284 	error = VFS_STATFS(mp, sp, td);
285 	if (error)
286 		return (error);
287 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
288 	if (suser(td)) {
289 		bcopy(sp, &sb, sizeof(sb));
290 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
291 		sp = &sb;
292 	}
293 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
294 }
295 
296 /*
297  * Get statistics on all filesystems.
298  */
299 #ifndef _SYS_SYSPROTO_H_
300 struct getfsstat_args {
301 	struct statfs *buf;
302 	long bufsize;
303 	int flags;
304 };
305 #endif
306 int
307 getfsstat(td, uap)
308 	struct thread *td;
309 	register struct getfsstat_args /* {
310 		syscallarg(struct statfs *) buf;
311 		syscallarg(long) bufsize;
312 		syscallarg(int) flags;
313 	} */ *uap;
314 {
315 	register struct mount *mp, *nmp;
316 	register struct statfs *sp;
317 	caddr_t sfsp;
318 	long count, maxcount, error;
319 
320 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
321 	sfsp = (caddr_t)SCARG(uap, buf);
322 	count = 0;
323 	mtx_lock(&mountlist_mtx);
324 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
325 #ifdef MAC
326 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
327 			nmp = TAILQ_NEXT(mp, mnt_list);
328 			continue;
329 		}
330 #endif
331 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
332 			nmp = TAILQ_NEXT(mp, mnt_list);
333 			continue;
334 		}
335 		if (sfsp && count < maxcount) {
336 			sp = &mp->mnt_stat;
337 			/*
338 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
339 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
340 			 * overrides MNT_WAIT.
341 			 */
342 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
343 			    (SCARG(uap, flags) & MNT_WAIT)) &&
344 			    (error = VFS_STATFS(mp, sp, td))) {
345 				mtx_lock(&mountlist_mtx);
346 				nmp = TAILQ_NEXT(mp, mnt_list);
347 				vfs_unbusy(mp, td);
348 				continue;
349 			}
350 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351 			error = copyout(sp, sfsp, sizeof(*sp));
352 			if (error) {
353 				vfs_unbusy(mp, td);
354 				return (error);
355 			}
356 			sfsp += sizeof(*sp);
357 		}
358 		count++;
359 		mtx_lock(&mountlist_mtx);
360 		nmp = TAILQ_NEXT(mp, mnt_list);
361 		vfs_unbusy(mp, td);
362 	}
363 	mtx_unlock(&mountlist_mtx);
364 	if (sfsp && count > maxcount)
365 		td->td_retval[0] = maxcount;
366 	else
367 		td->td_retval[0] = count;
368 	return (0);
369 }
370 
371 /*
372  * Change current working directory to a given file descriptor.
373  */
374 #ifndef _SYS_SYSPROTO_H_
375 struct fchdir_args {
376 	int	fd;
377 };
378 #endif
379 /* ARGSUSED */
380 int
381 fchdir(td, uap)
382 	struct thread *td;
383 	struct fchdir_args /* {
384 		syscallarg(int) fd;
385 	} */ *uap;
386 {
387 	register struct filedesc *fdp = td->td_proc->p_fd;
388 	struct vnode *vp, *tdp, *vpold;
389 	struct mount *mp;
390 	struct file *fp;
391 	int error;
392 
393 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
394 		return (error);
395 	vp = (struct vnode *)fp->f_data;
396 	VREF(vp);
397 	fdrop(fp, td);
398 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
399 	if (vp->v_type != VDIR)
400 		error = ENOTDIR;
401 #ifdef MAC
402 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
403 	}
404 #endif
405 	else
406 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
407 	while (!error && (mp = vp->v_mountedhere) != NULL) {
408 		if (vfs_busy(mp, 0, 0, td))
409 			continue;
410 		error = VFS_ROOT(mp, &tdp);
411 		vfs_unbusy(mp, td);
412 		if (error)
413 			break;
414 		vput(vp);
415 		vp = tdp;
416 	}
417 	if (error) {
418 		vput(vp);
419 		return (error);
420 	}
421 	VOP_UNLOCK(vp, 0, td);
422 	FILEDESC_LOCK(fdp);
423 	vpold = fdp->fd_cdir;
424 	fdp->fd_cdir = vp;
425 	FILEDESC_UNLOCK(fdp);
426 	vrele(vpold);
427 	return (0);
428 }
429 
430 /*
431  * Change current working directory (``.'').
432  */
433 #ifndef _SYS_SYSPROTO_H_
434 struct chdir_args {
435 	char	*path;
436 };
437 #endif
438 /* ARGSUSED */
439 int
440 chdir(td, uap)
441 	struct thread *td;
442 	struct chdir_args /* {
443 		syscallarg(char *) path;
444 	} */ *uap;
445 {
446 
447 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
448 }
449 
450 int
451 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
452 {
453 	register struct filedesc *fdp = td->td_proc->p_fd;
454 	int error;
455 	struct nameidata nd;
456 	struct vnode *vp;
457 
458 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
459 	if ((error = change_dir(&nd, td)) != 0)
460 		return (error);
461 	NDFREE(&nd, NDF_ONLY_PNBUF);
462 	FILEDESC_LOCK(fdp);
463 	vp = fdp->fd_cdir;
464 	fdp->fd_cdir = nd.ni_vp;
465 	FILEDESC_UNLOCK(fdp);
466 	vrele(vp);
467 	return (0);
468 }
469 
470 /*
471  * Helper function for raised chroot(2) security function:  Refuse if
472  * any filedescriptors are open directories.
473  */
474 static int
475 chroot_refuse_vdir_fds(fdp)
476 	struct filedesc *fdp;
477 {
478 	struct vnode *vp;
479 	struct file *fp;
480 	int fd;
481 
482 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
483 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
484 		fp = fget_locked(fdp, fd);
485 		if (fp == NULL)
486 			continue;
487 		if (fp->f_type == DTYPE_VNODE) {
488 			vp = (struct vnode *)fp->f_data;
489 			if (vp->v_type == VDIR)
490 				return (EPERM);
491 		}
492 	}
493 	return (0);
494 }
495 
496 /*
497  * This sysctl determines if we will allow a process to chroot(2) if it
498  * has a directory open:
499  *	0: disallowed for all processes.
500  *	1: allowed for processes that were not already chroot(2)'ed.
501  *	2: allowed for all processes.
502  */
503 
504 static int chroot_allow_open_directories = 1;
505 
506 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
507      &chroot_allow_open_directories, 0, "");
508 
509 /*
510  * Change notion of root (``/'') directory.
511  */
512 #ifndef _SYS_SYSPROTO_H_
513 struct chroot_args {
514 	char	*path;
515 };
516 #endif
517 /* ARGSUSED */
518 int
519 chroot(td, uap)
520 	struct thread *td;
521 	struct chroot_args /* {
522 		syscallarg(char *) path;
523 	} */ *uap;
524 {
525 	register struct filedesc *fdp = td->td_proc->p_fd;
526 	int error;
527 	struct nameidata nd;
528 	struct vnode *vp;
529 
530 	error = suser_cred(td->td_ucred, PRISON_ROOT);
531 	if (error)
532 		return (error);
533 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
534 	    SCARG(uap, path), td);
535 	mtx_lock(&Giant);
536 	if ((error = change_dir(&nd, td)) != 0)
537 		goto error;
538 #ifdef MAC
539 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
540 		goto error;
541 #endif
542 	FILEDESC_LOCK(fdp);
543 	if (chroot_allow_open_directories == 0 ||
544 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
545 		error = chroot_refuse_vdir_fds(fdp);
546 		if (error)
547 			goto error_unlock;
548 	}
549 	vp = fdp->fd_rdir;
550 	fdp->fd_rdir = nd.ni_vp;
551 	if (!fdp->fd_jdir) {
552 		fdp->fd_jdir = nd.ni_vp;
553                 VREF(fdp->fd_jdir);
554 	}
555 	FILEDESC_UNLOCK(fdp);
556 	NDFREE(&nd, NDF_ONLY_PNBUF);
557 	vrele(vp);
558 	mtx_unlock(&Giant);
559 	return (0);
560 error_unlock:
561 	FILEDESC_UNLOCK(fdp);
562 error:
563 	mtx_unlock(&Giant);
564 	NDFREE(&nd, 0);
565 	return (error);
566 }
567 
568 /*
569  * Common routine for chroot and chdir.
570  */
571 static int
572 change_dir(ndp, td)
573 	register struct nameidata *ndp;
574 	struct thread *td;
575 {
576 	struct vnode *vp;
577 	int error;
578 
579 	error = namei(ndp);
580 	if (error)
581 		return (error);
582 	vp = ndp->ni_vp;
583 	if (vp->v_type != VDIR)
584 		error = ENOTDIR;
585 #ifdef MAC
586 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
587 	}
588 #endif
589 	else
590 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
591 	if (error)
592 		vput(vp);
593 	else
594 		VOP_UNLOCK(vp, 0, td);
595 	return (error);
596 }
597 
598 /*
599  * Check permissions, allocate an open file structure,
600  * and call the device open routine if any.
601  */
602 #ifndef _SYS_SYSPROTO_H_
603 struct open_args {
604 	char	*path;
605 	int	flags;
606 	int	mode;
607 };
608 #endif
609 int
610 open(td, uap)
611 	struct thread *td;
612 	register struct open_args /* {
613 		syscallarg(char *) path;
614 		syscallarg(int) flags;
615 		syscallarg(int) mode;
616 	} */ *uap;
617 {
618 
619 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
620 }
621 
622 int
623 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
624     int mode)
625 {
626 	struct proc *p = td->td_proc;
627 	struct filedesc *fdp = p->p_fd;
628 	struct file *fp;
629 	struct vnode *vp;
630 	struct vattr vat;
631 	struct mount *mp;
632 	int cmode, oflags;
633 	struct file *nfp;
634 	int type, indx, error;
635 	struct flock lf;
636 	struct nameidata nd;
637 
638 	if ((flags & O_ACCMODE) == O_ACCMODE)
639 		return (EINVAL);
640 	oflags = flags;
641 	flags = FFLAGS(flags);
642 	error = falloc(td, &nfp, &indx);
643 	if (error)
644 		return (error);
645 	fp = nfp;
646 	FILEDESC_LOCK(fdp);
647 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
648 	FILEDESC_UNLOCK(fdp);
649 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
650 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
651 	/*
652 	 * Bump the ref count to prevent another process from closing
653 	 * the descriptor while we are blocked in vn_open()
654 	 */
655 	fhold(fp);
656 	error = vn_open(&nd, &flags, cmode);
657 	if (error) {
658 		/*
659 		 * release our own reference
660 		 */
661 		fdrop(fp, td);
662 
663 		/*
664 		 * handle special fdopen() case.  bleh.  dupfdopen() is
665 		 * responsible for dropping the old contents of ofiles[indx]
666 		 * if it succeeds.
667 		 */
668 		if ((error == ENODEV || error == ENXIO) &&
669 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
670 		    (error =
671 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
672 			td->td_retval[0] = indx;
673 			return (0);
674 		}
675 		/*
676 		 * Clean up the descriptor, but only if another thread hadn't
677 		 * replaced or closed it.
678 		 */
679 		FILEDESC_LOCK(fdp);
680 		if (fdp->fd_ofiles[indx] == fp) {
681 			fdp->fd_ofiles[indx] = NULL;
682 			FILEDESC_UNLOCK(fdp);
683 			fdrop(fp, td);
684 		} else
685 			FILEDESC_UNLOCK(fdp);
686 
687 		if (error == ERESTART)
688 			error = EINTR;
689 		return (error);
690 	}
691 	td->td_dupfd = 0;
692 	NDFREE(&nd, NDF_ONLY_PNBUF);
693 	vp = nd.ni_vp;
694 
695 	/*
696 	 * There should be 2 references on the file, one from the descriptor
697 	 * table, and one for us.
698 	 *
699 	 * Handle the case where someone closed the file (via its file
700 	 * descriptor) while we were blocked.  The end result should look
701 	 * like opening the file succeeded but it was immediately closed.
702 	 */
703 	FILEDESC_LOCK(fdp);
704 	FILE_LOCK(fp);
705 	if (fp->f_count == 1) {
706 		KASSERT(fdp->fd_ofiles[indx] != fp,
707 		    ("Open file descriptor lost all refs"));
708 		FILEDESC_UNLOCK(fdp);
709 		FILE_UNLOCK(fp);
710 		VOP_UNLOCK(vp, 0, td);
711 		vn_close(vp, flags & FMASK, fp->f_cred, td);
712 		fdrop(fp, td);
713 		td->td_retval[0] = indx;
714 		return 0;
715 	}
716 
717 	/* assert that vn_open created a backing object if one is needed */
718 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
719 		("open: vmio vnode has no backing object after vn_open"));
720 
721 	fp->f_data = vp;
722 	fp->f_flag = flags & FMASK;
723 	fp->f_ops = &vnops;
724 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
725 	FILEDESC_UNLOCK(fdp);
726 	FILE_UNLOCK(fp);
727 	VOP_UNLOCK(vp, 0, td);
728 	if (flags & (O_EXLOCK | O_SHLOCK)) {
729 		lf.l_whence = SEEK_SET;
730 		lf.l_start = 0;
731 		lf.l_len = 0;
732 		if (flags & O_EXLOCK)
733 			lf.l_type = F_WRLCK;
734 		else
735 			lf.l_type = F_RDLCK;
736 		type = F_FLOCK;
737 		if ((flags & FNONBLOCK) == 0)
738 			type |= F_WAIT;
739 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
740 			    type)) != 0)
741 			goto bad;
742 		fp->f_flag |= FHASLOCK;
743 	}
744 	if (flags & O_TRUNC) {
745 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
746 			goto bad;
747 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
748 		VATTR_NULL(&vat);
749 		vat.va_size = 0;
750 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
751 #ifdef MAC
752 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
753 		if (error == 0)
754 #endif
755 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
756 		VOP_UNLOCK(vp, 0, td);
757 		vn_finished_write(mp);
758 		if (error)
759 			goto bad;
760 	}
761 	/*
762 	 * Release our private reference, leaving the one associated with
763 	 * the descriptor table intact.
764 	 */
765 	fdrop(fp, td);
766 	td->td_retval[0] = indx;
767 	return (0);
768 bad:
769 	FILEDESC_LOCK(fdp);
770 	if (fdp->fd_ofiles[indx] == fp) {
771 		fdp->fd_ofiles[indx] = NULL;
772 		FILEDESC_UNLOCK(fdp);
773 		fdrop(fp, td);
774 	} else
775 		FILEDESC_UNLOCK(fdp);
776 	fdrop(fp, td);
777 	return (error);
778 }
779 
780 #ifdef COMPAT_43
781 /*
782  * Create a file.
783  */
784 #ifndef _SYS_SYSPROTO_H_
785 struct ocreat_args {
786 	char	*path;
787 	int	mode;
788 };
789 #endif
790 int
791 ocreat(td, uap)
792 	struct thread *td;
793 	register struct ocreat_args /* {
794 		syscallarg(char *) path;
795 		syscallarg(int) mode;
796 	} */ *uap;
797 {
798 	struct open_args /* {
799 		syscallarg(char *) path;
800 		syscallarg(int) flags;
801 		syscallarg(int) mode;
802 	} */ nuap;
803 
804 	SCARG(&nuap, path) = SCARG(uap, path);
805 	SCARG(&nuap, mode) = SCARG(uap, mode);
806 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
807 	return (open(td, &nuap));
808 }
809 #endif /* COMPAT_43 */
810 
811 /*
812  * Create a special file.
813  */
814 #ifndef _SYS_SYSPROTO_H_
815 struct mknod_args {
816 	char	*path;
817 	int	mode;
818 	int	dev;
819 };
820 #endif
821 /* ARGSUSED */
822 int
823 mknod(td, uap)
824 	struct thread *td;
825 	register struct mknod_args /* {
826 		syscallarg(char *) path;
827 		syscallarg(int) mode;
828 		syscallarg(int) dev;
829 	} */ *uap;
830 {
831 
832 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
833 }
834 
835 int
836 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
837     int dev)
838 {
839 	struct vnode *vp;
840 	struct mount *mp;
841 	struct vattr vattr;
842 	int error;
843 	int whiteout = 0;
844 	struct nameidata nd;
845 
846 	switch (mode & S_IFMT) {
847 	case S_IFCHR:
848 	case S_IFBLK:
849 		error = suser(td);
850 		break;
851 	default:
852 		error = suser_cred(td->td_ucred, PRISON_ROOT);
853 		break;
854 	}
855 	if (error)
856 		return (error);
857 restart:
858 	bwillwrite();
859 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
860 	if ((error = namei(&nd)) != 0)
861 		return (error);
862 	vp = nd.ni_vp;
863 	if (vp != NULL) {
864 		vrele(vp);
865 		error = EEXIST;
866 	} else {
867 		VATTR_NULL(&vattr);
868 		FILEDESC_LOCK(td->td_proc->p_fd);
869 		vattr.va_mode = (mode & ALLPERMS) &
870 		    ~td->td_proc->p_fd->fd_cmask;
871 		FILEDESC_UNLOCK(td->td_proc->p_fd);
872 		vattr.va_rdev = dev;
873 		whiteout = 0;
874 
875 		switch (mode & S_IFMT) {
876 		case S_IFMT:	/* used by badsect to flag bad sectors */
877 			vattr.va_type = VBAD;
878 			break;
879 		case S_IFCHR:
880 			vattr.va_type = VCHR;
881 			break;
882 		case S_IFBLK:
883 			vattr.va_type = VBLK;
884 			break;
885 		case S_IFWHT:
886 			whiteout = 1;
887 			break;
888 		default:
889 			error = EINVAL;
890 			break;
891 		}
892 	}
893 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
894 		NDFREE(&nd, NDF_ONLY_PNBUF);
895 		vput(nd.ni_dvp);
896 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
897 			return (error);
898 		goto restart;
899 	}
900 #ifdef MAC
901 	if (error == 0 && !whiteout)
902 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
903 		    &nd.ni_cnd, &vattr);
904 #endif
905 	if (!error) {
906 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
907 		if (whiteout)
908 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
909 		else {
910 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
911 						&nd.ni_cnd, &vattr);
912 			if (error == 0)
913 				vput(nd.ni_vp);
914 		}
915 	}
916 	NDFREE(&nd, NDF_ONLY_PNBUF);
917 	vput(nd.ni_dvp);
918 	vn_finished_write(mp);
919 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
920 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
921 	return (error);
922 }
923 
924 /*
925  * Create a named pipe.
926  */
927 #ifndef _SYS_SYSPROTO_H_
928 struct mkfifo_args {
929 	char	*path;
930 	int	mode;
931 };
932 #endif
933 /* ARGSUSED */
934 int
935 mkfifo(td, uap)
936 	struct thread *td;
937 	register struct mkfifo_args /* {
938 		syscallarg(char *) path;
939 		syscallarg(int) mode;
940 	} */ *uap;
941 {
942 
943 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
944 }
945 
946 int
947 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
948 {
949 	struct mount *mp;
950 	struct vattr vattr;
951 	int error;
952 	struct nameidata nd;
953 
954 restart:
955 	bwillwrite();
956 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
957 	if ((error = namei(&nd)) != 0)
958 		return (error);
959 	if (nd.ni_vp != NULL) {
960 		NDFREE(&nd, NDF_ONLY_PNBUF);
961 		vrele(nd.ni_vp);
962 		vput(nd.ni_dvp);
963 		return (EEXIST);
964 	}
965 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
966 		NDFREE(&nd, NDF_ONLY_PNBUF);
967 		vput(nd.ni_dvp);
968 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
969 			return (error);
970 		goto restart;
971 	}
972 	VATTR_NULL(&vattr);
973 	vattr.va_type = VFIFO;
974 	FILEDESC_LOCK(td->td_proc->p_fd);
975 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
976 	FILEDESC_UNLOCK(td->td_proc->p_fd);
977 #ifdef MAC
978 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
979 	    &vattr);
980 	if (error)
981 		goto out;
982 #endif
983 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
984 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
985 	if (error == 0)
986 		vput(nd.ni_vp);
987 #ifdef MAC
988 out:
989 #endif
990 	NDFREE(&nd, NDF_ONLY_PNBUF);
991 	vput(nd.ni_dvp);
992 	vn_finished_write(mp);
993 	return (error);
994 }
995 
996 /*
997  * Make a hard file link.
998  */
999 #ifndef _SYS_SYSPROTO_H_
1000 struct link_args {
1001 	char	*path;
1002 	char	*link;
1003 };
1004 #endif
1005 /* ARGSUSED */
1006 int
1007 link(td, uap)
1008 	struct thread *td;
1009 	register struct link_args /* {
1010 		syscallarg(char *) path;
1011 		syscallarg(char *) link;
1012 	} */ *uap;
1013 {
1014 
1015 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1016 }
1017 
1018 int
1019 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1020 {
1021 	struct vnode *vp;
1022 	struct mount *mp;
1023 	struct nameidata nd;
1024 	int error;
1025 
1026 	bwillwrite();
1027 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1028 	if ((error = namei(&nd)) != 0)
1029 		return (error);
1030 	NDFREE(&nd, NDF_ONLY_PNBUF);
1031 	vp = nd.ni_vp;
1032 	if (vp->v_type == VDIR) {
1033 		vrele(vp);
1034 		return (EPERM);		/* POSIX */
1035 	}
1036 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1037 		vrele(vp);
1038 		return (error);
1039 	}
1040 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1041 	if ((error = namei(&nd)) == 0) {
1042 		if (nd.ni_vp != NULL) {
1043 			vrele(nd.ni_vp);
1044 			error = EEXIST;
1045 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1046 		    == 0) {
1047 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1048 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1049 #ifdef MAC
1050 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1051 			    vp, &nd.ni_cnd);
1052 			if (error == 0)
1053 #endif
1054 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1055 			VOP_UNLOCK(vp, 0, td);
1056 		}
1057 		NDFREE(&nd, NDF_ONLY_PNBUF);
1058 		vput(nd.ni_dvp);
1059 	}
1060 	vrele(vp);
1061 	vn_finished_write(mp);
1062 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1063 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1064 	return (error);
1065 }
1066 
1067 /*
1068  * Make a symbolic link.
1069  */
1070 #ifndef _SYS_SYSPROTO_H_
1071 struct symlink_args {
1072 	char	*path;
1073 	char	*link;
1074 };
1075 #endif
1076 /* ARGSUSED */
1077 int
1078 symlink(td, uap)
1079 	struct thread *td;
1080 	register struct symlink_args /* {
1081 		syscallarg(char *) path;
1082 		syscallarg(char *) link;
1083 	} */ *uap;
1084 {
1085 
1086 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1087 }
1088 
1089 int
1090 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1091 {
1092 	struct mount *mp;
1093 	struct vattr vattr;
1094 	char *syspath;
1095 	int error;
1096 	struct nameidata nd;
1097 
1098 	if (segflg == UIO_SYSSPACE) {
1099 		syspath = path;
1100 	} else {
1101 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1102 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1103 			goto out;
1104 	}
1105 restart:
1106 	bwillwrite();
1107 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1108 	if ((error = namei(&nd)) != 0)
1109 		goto out;
1110 	if (nd.ni_vp) {
1111 		NDFREE(&nd, NDF_ONLY_PNBUF);
1112 		vrele(nd.ni_vp);
1113 		vput(nd.ni_dvp);
1114 		error = EEXIST;
1115 		goto out;
1116 	}
1117 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1118 		NDFREE(&nd, NDF_ONLY_PNBUF);
1119 		vput(nd.ni_dvp);
1120 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1121 			return (error);
1122 		goto restart;
1123 	}
1124 	VATTR_NULL(&vattr);
1125 	FILEDESC_LOCK(td->td_proc->p_fd);
1126 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1127 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1128 #ifdef MAC
1129 	vattr.va_type = VLNK;
1130 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1131 	    &vattr);
1132 	if (error)
1133 		goto out2;
1134 #endif
1135 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1136 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1137 	if (error == 0)
1138 		vput(nd.ni_vp);
1139 #ifdef MAC
1140 out2:
1141 #endif
1142 	NDFREE(&nd, NDF_ONLY_PNBUF);
1143 	vput(nd.ni_dvp);
1144 	vn_finished_write(mp);
1145 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1146 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1147 out:
1148 	if (segflg != UIO_SYSSPACE)
1149 		uma_zfree(namei_zone, syspath);
1150 	return (error);
1151 }
1152 
1153 /*
1154  * Delete a whiteout from the filesystem.
1155  */
1156 /* ARGSUSED */
1157 int
1158 undelete(td, uap)
1159 	struct thread *td;
1160 	register struct undelete_args /* {
1161 		syscallarg(char *) path;
1162 	} */ *uap;
1163 {
1164 	int error;
1165 	struct mount *mp;
1166 	struct nameidata nd;
1167 
1168 restart:
1169 	bwillwrite();
1170 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1171 	    SCARG(uap, path), td);
1172 	error = namei(&nd);
1173 	if (error)
1174 		return (error);
1175 
1176 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1177 		NDFREE(&nd, NDF_ONLY_PNBUF);
1178 		if (nd.ni_vp)
1179 			vrele(nd.ni_vp);
1180 		vput(nd.ni_dvp);
1181 		return (EEXIST);
1182 	}
1183 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1184 		NDFREE(&nd, NDF_ONLY_PNBUF);
1185 		vput(nd.ni_dvp);
1186 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1187 			return (error);
1188 		goto restart;
1189 	}
1190 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1191 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1192 	NDFREE(&nd, NDF_ONLY_PNBUF);
1193 	vput(nd.ni_dvp);
1194 	vn_finished_write(mp);
1195 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1196 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1197 	return (error);
1198 }
1199 
1200 /*
1201  * Delete a name from the filesystem.
1202  */
1203 #ifndef _SYS_SYSPROTO_H_
1204 struct unlink_args {
1205 	char	*path;
1206 };
1207 #endif
1208 /* ARGSUSED */
1209 int
1210 unlink(td, uap)
1211 	struct thread *td;
1212 	struct unlink_args /* {
1213 		syscallarg(char *) path;
1214 	} */ *uap;
1215 {
1216 
1217 	return (kern_unlink(td, SCARG(uap, path), UIO_USERSPACE));
1218 }
1219 
1220 int
1221 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1222 {
1223 	struct mount *mp;
1224 	struct vnode *vp;
1225 	int error;
1226 	struct nameidata nd;
1227 
1228 restart:
1229 	bwillwrite();
1230 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1231 	if ((error = namei(&nd)) != 0)
1232 		return (error);
1233 	vp = nd.ni_vp;
1234 	if (vp->v_type == VDIR)
1235 		error = EPERM;		/* POSIX */
1236 	else {
1237 		/*
1238 		 * The root of a mounted filesystem cannot be deleted.
1239 		 *
1240 		 * XXX: can this only be a VDIR case?
1241 		 */
1242 		if (vp->v_vflag & VV_ROOT)
1243 			error = EBUSY;
1244 	}
1245 	if (error == 0) {
1246 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1247 			NDFREE(&nd, NDF_ONLY_PNBUF);
1248 			if (vp == nd.ni_dvp)
1249 				vrele(vp);
1250 			else
1251 				vput(vp);
1252 			vput(nd.ni_dvp);
1253 			if ((error = vn_start_write(NULL, &mp,
1254 			    V_XSLEEP | PCATCH)) != 0)
1255 				return (error);
1256 			goto restart;
1257 		}
1258 #ifdef MAC
1259 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1260 		    &nd.ni_cnd);
1261 		if (error)
1262 			goto out;
1263 #endif
1264 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1265 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1266 #ifdef MAC
1267 out:
1268 #endif
1269 		vn_finished_write(mp);
1270 	}
1271 	NDFREE(&nd, NDF_ONLY_PNBUF);
1272 	if (vp == nd.ni_dvp)
1273 		vrele(vp);
1274 	else
1275 		vput(vp);
1276 	vput(nd.ni_dvp);
1277 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1278 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1279 	return (error);
1280 }
1281 
1282 /*
1283  * Reposition read/write file offset.
1284  */
1285 #ifndef _SYS_SYSPROTO_H_
1286 struct lseek_args {
1287 	int	fd;
1288 	int	pad;
1289 	off_t	offset;
1290 	int	whence;
1291 };
1292 #endif
1293 int
1294 lseek(td, uap)
1295 	struct thread *td;
1296 	register struct lseek_args /* {
1297 		syscallarg(int) fd;
1298 		syscallarg(int) pad;
1299 		syscallarg(off_t) offset;
1300 		syscallarg(int) whence;
1301 	} */ *uap;
1302 {
1303 	struct ucred *cred = td->td_ucred;
1304 	struct file *fp;
1305 	struct vnode *vp;
1306 	struct vattr vattr;
1307 	off_t offset;
1308 	int error, noneg;
1309 
1310 	if ((error = fget(td, uap->fd, &fp)) != 0)
1311 		return (error);
1312 	if (fp->f_type != DTYPE_VNODE) {
1313 		fdrop(fp, td);
1314 		return (ESPIPE);
1315 	}
1316 	vp = (struct vnode *)fp->f_data;
1317 	noneg = (vp->v_type != VCHR);
1318 	offset = SCARG(uap, offset);
1319 	switch (SCARG(uap, whence)) {
1320 	case L_INCR:
1321 		if (noneg &&
1322 		    (fp->f_offset < 0 ||
1323 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1324 			return (EOVERFLOW);
1325 		offset += fp->f_offset;
1326 		break;
1327 	case L_XTND:
1328 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1329 		error = VOP_GETATTR(vp, &vattr, cred, td);
1330 		VOP_UNLOCK(vp, 0, td);
1331 		if (error)
1332 			return (error);
1333 		if (noneg &&
1334 		    (vattr.va_size > OFF_MAX ||
1335 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1336 			return (EOVERFLOW);
1337 		offset += vattr.va_size;
1338 		break;
1339 	case L_SET:
1340 		break;
1341 	default:
1342 		fdrop(fp, td);
1343 		return (EINVAL);
1344 	}
1345 	if (noneg && offset < 0)
1346 		return (EINVAL);
1347 	fp->f_offset = offset;
1348 	*(off_t *)(td->td_retval) = fp->f_offset;
1349 	fdrop(fp, td);
1350 	return (0);
1351 }
1352 
1353 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1354 /*
1355  * Reposition read/write file offset.
1356  */
1357 #ifndef _SYS_SYSPROTO_H_
1358 struct olseek_args {
1359 	int	fd;
1360 	long	offset;
1361 	int	whence;
1362 };
1363 #endif
1364 int
1365 olseek(td, uap)
1366 	struct thread *td;
1367 	register struct olseek_args /* {
1368 		syscallarg(int) fd;
1369 		syscallarg(long) offset;
1370 		syscallarg(int) whence;
1371 	} */ *uap;
1372 {
1373 	struct lseek_args /* {
1374 		syscallarg(int) fd;
1375 		syscallarg(int) pad;
1376 		syscallarg(off_t) offset;
1377 		syscallarg(int) whence;
1378 	} */ nuap;
1379 	int error;
1380 
1381 	SCARG(&nuap, fd) = SCARG(uap, fd);
1382 	SCARG(&nuap, offset) = SCARG(uap, offset);
1383 	SCARG(&nuap, whence) = SCARG(uap, whence);
1384 	error = lseek(td, &nuap);
1385 	return (error);
1386 }
1387 #endif /* COMPAT_43 */
1388 
1389 /*
1390  * Check access permissions using passed credentials.
1391  */
1392 static int
1393 vn_access(vp, user_flags, cred, td)
1394 	struct vnode	*vp;
1395 	int		user_flags;
1396 	struct ucred	*cred;
1397 	struct thread	*td;
1398 {
1399 	int error, flags;
1400 
1401 	/* Flags == 0 means only check for existence. */
1402 	error = 0;
1403 	if (user_flags) {
1404 		flags = 0;
1405 		if (user_flags & R_OK)
1406 			flags |= VREAD;
1407 		if (user_flags & W_OK)
1408 			flags |= VWRITE;
1409 		if (user_flags & X_OK)
1410 			flags |= VEXEC;
1411 #ifdef MAC
1412 		error = mac_check_vnode_access(cred, vp, flags);
1413 		if (error)
1414 			return (error);
1415 #endif
1416 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1417 			error = VOP_ACCESS(vp, flags, cred, td);
1418 	}
1419 	return (error);
1420 }
1421 
1422 /*
1423  * Check access permissions using "real" credentials.
1424  */
1425 #ifndef _SYS_SYSPROTO_H_
1426 struct access_args {
1427 	char	*path;
1428 	int	flags;
1429 };
1430 #endif
1431 int
1432 access(td, uap)
1433 	struct thread *td;
1434 	register struct access_args /* {
1435 		syscallarg(char *) path;
1436 		syscallarg(int) flags;
1437 	} */ *uap;
1438 {
1439 
1440 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1441 }
1442 
1443 int
1444 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1445 {
1446 	struct ucred *cred, *tmpcred;
1447 	register struct vnode *vp;
1448 	int error;
1449 	struct nameidata nd;
1450 
1451 	/*
1452 	 * Create and modify a temporary credential instead of one that
1453 	 * is potentially shared.  This could also mess up socket
1454 	 * buffer accounting which can run in an interrupt context.
1455 	 *
1456 	 * XXX - Depending on how "threads" are finally implemented, it
1457 	 * may be better to explicitly pass the credential to namei()
1458 	 * rather than to modify the potentially shared process structure.
1459 	 */
1460 	cred = td->td_ucred;
1461 	tmpcred = crdup(cred);
1462 	tmpcred->cr_uid = cred->cr_ruid;
1463 	tmpcred->cr_groups[0] = cred->cr_rgid;
1464 	td->td_ucred = tmpcred;
1465 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1466 	if ((error = namei(&nd)) != 0)
1467 		goto out1;
1468 	vp = nd.ni_vp;
1469 
1470 	error = vn_access(vp, flags, tmpcred, td);
1471 	NDFREE(&nd, NDF_ONLY_PNBUF);
1472 	vput(vp);
1473 out1:
1474 	td->td_ucred = cred;
1475 	crfree(tmpcred);
1476 	return (error);
1477 }
1478 
1479 /*
1480  * Check access permissions using "effective" credentials.
1481  */
1482 #ifndef _SYS_SYSPROTO_H_
1483 struct eaccess_args {
1484 	char	*path;
1485 	int	flags;
1486 };
1487 #endif
1488 int
1489 eaccess(td, uap)
1490 	struct thread *td;
1491 	register struct eaccess_args /* {
1492 		syscallarg(char *) path;
1493 		syscallarg(int) flags;
1494 	} */ *uap;
1495 {
1496 	struct nameidata nd;
1497 	struct vnode *vp;
1498 	int error;
1499 
1500 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1501 	    SCARG(uap, path), td);
1502 	if ((error = namei(&nd)) != 0)
1503 		return (error);
1504 	vp = nd.ni_vp;
1505 
1506 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1507 	NDFREE(&nd, NDF_ONLY_PNBUF);
1508 	vput(vp);
1509 	return (error);
1510 }
1511 
1512 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1513 /*
1514  * Get file status; this version follows links.
1515  */
1516 #ifndef _SYS_SYSPROTO_H_
1517 struct ostat_args {
1518 	char	*path;
1519 	struct ostat *ub;
1520 };
1521 #endif
1522 /* ARGSUSED */
1523 int
1524 ostat(td, uap)
1525 	struct thread *td;
1526 	register struct ostat_args /* {
1527 		syscallarg(char *) path;
1528 		syscallarg(struct ostat *) ub;
1529 	} */ *uap;
1530 {
1531 	struct stat sb;
1532 	struct ostat osb;
1533 	int error;
1534 	struct nameidata nd;
1535 
1536 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1537 	    SCARG(uap, path), td);
1538 	if ((error = namei(&nd)) != 0)
1539 		return (error);
1540 	NDFREE(&nd, NDF_ONLY_PNBUF);
1541 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1542 	vput(nd.ni_vp);
1543 	if (error)
1544 		return (error);
1545 	cvtstat(&sb, &osb);
1546 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1547 	return (error);
1548 }
1549 
1550 /*
1551  * Get file status; this version does not follow links.
1552  */
1553 #ifndef _SYS_SYSPROTO_H_
1554 struct olstat_args {
1555 	char	*path;
1556 	struct ostat *ub;
1557 };
1558 #endif
1559 /* ARGSUSED */
1560 int
1561 olstat(td, uap)
1562 	struct thread *td;
1563 	register struct olstat_args /* {
1564 		syscallarg(char *) path;
1565 		syscallarg(struct ostat *) ub;
1566 	} */ *uap;
1567 {
1568 	struct vnode *vp;
1569 	struct stat sb;
1570 	struct ostat osb;
1571 	int error;
1572 	struct nameidata nd;
1573 
1574 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1575 	    SCARG(uap, path), td);
1576 	if ((error = namei(&nd)) != 0)
1577 		return (error);
1578 	vp = nd.ni_vp;
1579 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1580 	NDFREE(&nd, NDF_ONLY_PNBUF);
1581 	vput(vp);
1582 	if (error)
1583 		return (error);
1584 	cvtstat(&sb, &osb);
1585 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1586 	return (error);
1587 }
1588 
1589 /*
1590  * Convert from an old to a new stat structure.
1591  */
1592 void
1593 cvtstat(st, ost)
1594 	struct stat *st;
1595 	struct ostat *ost;
1596 {
1597 
1598 	ost->st_dev = st->st_dev;
1599 	ost->st_ino = st->st_ino;
1600 	ost->st_mode = st->st_mode;
1601 	ost->st_nlink = st->st_nlink;
1602 	ost->st_uid = st->st_uid;
1603 	ost->st_gid = st->st_gid;
1604 	ost->st_rdev = st->st_rdev;
1605 	if (st->st_size < (quad_t)1 << 32)
1606 		ost->st_size = st->st_size;
1607 	else
1608 		ost->st_size = -2;
1609 	ost->st_atime = st->st_atime;
1610 	ost->st_mtime = st->st_mtime;
1611 	ost->st_ctime = st->st_ctime;
1612 	ost->st_blksize = st->st_blksize;
1613 	ost->st_blocks = st->st_blocks;
1614 	ost->st_flags = st->st_flags;
1615 	ost->st_gen = st->st_gen;
1616 }
1617 #endif /* COMPAT_43 || COMPAT_SUNOS */
1618 
1619 /*
1620  * Get file status; this version follows links.
1621  */
1622 #ifndef _SYS_SYSPROTO_H_
1623 struct stat_args {
1624 	char	*path;
1625 	struct stat *ub;
1626 };
1627 #endif
1628 /* ARGSUSED */
1629 int
1630 stat(td, uap)
1631 	struct thread *td;
1632 	register struct stat_args /* {
1633 		syscallarg(char *) path;
1634 		syscallarg(struct stat *) ub;
1635 	} */ *uap;
1636 {
1637 	struct stat sb;
1638 	int error;
1639 	struct nameidata nd;
1640 
1641 #ifdef LOOKUP_SHARED
1642 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1643 	    UIO_USERSPACE, SCARG(uap, path), td);
1644 #else
1645 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1646 	    SCARG(uap, path), td);
1647 #endif
1648 	if ((error = namei(&nd)) != 0)
1649 		return (error);
1650 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1651 	NDFREE(&nd, NDF_ONLY_PNBUF);
1652 	vput(nd.ni_vp);
1653 	if (error)
1654 		return (error);
1655 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1656 	return (error);
1657 }
1658 
1659 /*
1660  * Get file status; this version does not follow links.
1661  */
1662 #ifndef _SYS_SYSPROTO_H_
1663 struct lstat_args {
1664 	char	*path;
1665 	struct stat *ub;
1666 };
1667 #endif
1668 /* ARGSUSED */
1669 int
1670 lstat(td, uap)
1671 	struct thread *td;
1672 	register struct lstat_args /* {
1673 		syscallarg(char *) path;
1674 		syscallarg(struct stat *) ub;
1675 	} */ *uap;
1676 {
1677 	int error;
1678 	struct vnode *vp;
1679 	struct stat sb;
1680 	struct nameidata nd;
1681 
1682 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1683 	    SCARG(uap, path), td);
1684 	if ((error = namei(&nd)) != 0)
1685 		return (error);
1686 	vp = nd.ni_vp;
1687 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1688 	NDFREE(&nd, NDF_ONLY_PNBUF);
1689 	vput(vp);
1690 	if (error)
1691 		return (error);
1692 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1693 	return (error);
1694 }
1695 
1696 /*
1697  * Implementation of the NetBSD stat() function.
1698  * XXX This should probably be collapsed with the FreeBSD version,
1699  * as the differences are only due to vn_stat() clearing spares at
1700  * the end of the structures.  vn_stat could be split to avoid this,
1701  * and thus collapse the following to close to zero code.
1702  */
1703 void
1704 cvtnstat(sb, nsb)
1705 	struct stat *sb;
1706 	struct nstat *nsb;
1707 {
1708 	bzero(nsb, sizeof *nsb);
1709 	nsb->st_dev = sb->st_dev;
1710 	nsb->st_ino = sb->st_ino;
1711 	nsb->st_mode = sb->st_mode;
1712 	nsb->st_nlink = sb->st_nlink;
1713 	nsb->st_uid = sb->st_uid;
1714 	nsb->st_gid = sb->st_gid;
1715 	nsb->st_rdev = sb->st_rdev;
1716 	nsb->st_atimespec = sb->st_atimespec;
1717 	nsb->st_mtimespec = sb->st_mtimespec;
1718 	nsb->st_ctimespec = sb->st_ctimespec;
1719 	nsb->st_size = sb->st_size;
1720 	nsb->st_blocks = sb->st_blocks;
1721 	nsb->st_blksize = sb->st_blksize;
1722 	nsb->st_flags = sb->st_flags;
1723 	nsb->st_gen = sb->st_gen;
1724 	nsb->st_birthtimespec = sb->st_birthtimespec;
1725 }
1726 
1727 #ifndef _SYS_SYSPROTO_H_
1728 struct nstat_args {
1729 	char	*path;
1730 	struct nstat *ub;
1731 };
1732 #endif
1733 /* ARGSUSED */
1734 int
1735 nstat(td, uap)
1736 	struct thread *td;
1737 	register struct nstat_args /* {
1738 		syscallarg(char *) path;
1739 		syscallarg(struct nstat *) ub;
1740 	} */ *uap;
1741 {
1742 	struct stat sb;
1743 	struct nstat nsb;
1744 	int error;
1745 	struct nameidata nd;
1746 
1747 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1748 	    SCARG(uap, path), td);
1749 	if ((error = namei(&nd)) != 0)
1750 		return (error);
1751 	NDFREE(&nd, NDF_ONLY_PNBUF);
1752 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1753 	vput(nd.ni_vp);
1754 	if (error)
1755 		return (error);
1756 	cvtnstat(&sb, &nsb);
1757 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1758 	return (error);
1759 }
1760 
1761 /*
1762  * NetBSD lstat.  Get file status; this version does not follow links.
1763  */
1764 #ifndef _SYS_SYSPROTO_H_
1765 struct lstat_args {
1766 	char	*path;
1767 	struct stat *ub;
1768 };
1769 #endif
1770 /* ARGSUSED */
1771 int
1772 nlstat(td, uap)
1773 	struct thread *td;
1774 	register struct nlstat_args /* {
1775 		syscallarg(char *) path;
1776 		syscallarg(struct nstat *) ub;
1777 	} */ *uap;
1778 {
1779 	int error;
1780 	struct vnode *vp;
1781 	struct stat sb;
1782 	struct nstat nsb;
1783 	struct nameidata nd;
1784 
1785 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1786 	    SCARG(uap, path), td);
1787 	if ((error = namei(&nd)) != 0)
1788 		return (error);
1789 	vp = nd.ni_vp;
1790 	NDFREE(&nd, NDF_ONLY_PNBUF);
1791 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1792 	vput(vp);
1793 	if (error)
1794 		return (error);
1795 	cvtnstat(&sb, &nsb);
1796 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1797 	return (error);
1798 }
1799 
1800 /*
1801  * Get configurable pathname variables.
1802  */
1803 #ifndef _SYS_SYSPROTO_H_
1804 struct pathconf_args {
1805 	char	*path;
1806 	int	name;
1807 };
1808 #endif
1809 /* ARGSUSED */
1810 int
1811 pathconf(td, uap)
1812 	struct thread *td;
1813 	register struct pathconf_args /* {
1814 		syscallarg(char *) path;
1815 		syscallarg(int) name;
1816 	} */ *uap;
1817 {
1818 	int error;
1819 	struct nameidata nd;
1820 
1821 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1822 	    SCARG(uap, path), td);
1823 	if ((error = namei(&nd)) != 0)
1824 		return (error);
1825 	NDFREE(&nd, NDF_ONLY_PNBUF);
1826 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
1827 	vput(nd.ni_vp);
1828 	return (error);
1829 }
1830 
1831 /*
1832  * Return target name of a symbolic link.
1833  */
1834 #ifndef _SYS_SYSPROTO_H_
1835 struct readlink_args {
1836 	char	*path;
1837 	char	*buf;
1838 	int	count;
1839 };
1840 #endif
1841 /* ARGSUSED */
1842 int
1843 readlink(td, uap)
1844 	struct thread *td;
1845 	register struct readlink_args /* {
1846 		syscallarg(char *) path;
1847 		syscallarg(char *) buf;
1848 		syscallarg(int) count;
1849 	} */ *uap;
1850 {
1851 
1852 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1853 	    UIO_USERSPACE, uap->count));
1854 }
1855 
1856 int
1857 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1858     enum uio_seg bufseg, int count)
1859 {
1860 	register struct vnode *vp;
1861 	struct iovec aiov;
1862 	struct uio auio;
1863 	int error;
1864 	struct nameidata nd;
1865 
1866 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1867 	if ((error = namei(&nd)) != 0)
1868 		return (error);
1869 	NDFREE(&nd, NDF_ONLY_PNBUF);
1870 	vp = nd.ni_vp;
1871 #ifdef MAC
1872 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1873 	if (error) {
1874 		vput(vp);
1875 		return (error);
1876 	}
1877 #endif
1878 	if (vp->v_type != VLNK)
1879 		error = EINVAL;
1880 	else {
1881 		aiov.iov_base = buf;
1882 		aiov.iov_len = count;
1883 		auio.uio_iov = &aiov;
1884 		auio.uio_iovcnt = 1;
1885 		auio.uio_offset = 0;
1886 		auio.uio_rw = UIO_READ;
1887 		auio.uio_segflg = bufseg;
1888 		auio.uio_td = td;
1889 		auio.uio_resid = count;
1890 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1891 	}
1892 	vput(vp);
1893 	td->td_retval[0] = count - auio.uio_resid;
1894 	return (error);
1895 }
1896 
1897 /*
1898  * Common implementation code for chflags() and fchflags().
1899  */
1900 static int
1901 setfflags(td, vp, flags)
1902 	struct thread *td;
1903 	struct vnode *vp;
1904 	int flags;
1905 {
1906 	int error;
1907 	struct mount *mp;
1908 	struct vattr vattr;
1909 
1910 	/*
1911 	 * Prevent non-root users from setting flags on devices.  When
1912 	 * a device is reused, users can retain ownership of the device
1913 	 * if they are allowed to set flags and programs assume that
1914 	 * chown can't fail when done as root.
1915 	 */
1916 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1917 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1918 		if (error)
1919 			return (error);
1920 	}
1921 
1922 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1923 		return (error);
1924 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1925 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1926 #ifdef MAC
1927 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1928 	if (error == 0) {
1929 #endif
1930 		VATTR_NULL(&vattr);
1931 		vattr.va_flags = flags;
1932 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1933 #ifdef MAC
1934 	}
1935 #endif
1936 	VOP_UNLOCK(vp, 0, td);
1937 	vn_finished_write(mp);
1938 	return (error);
1939 }
1940 
1941 /*
1942  * Change flags of a file given a path name.
1943  */
1944 #ifndef _SYS_SYSPROTO_H_
1945 struct chflags_args {
1946 	char	*path;
1947 	int	flags;
1948 };
1949 #endif
1950 /* ARGSUSED */
1951 int
1952 chflags(td, uap)
1953 	struct thread *td;
1954 	register struct chflags_args /* {
1955 		syscallarg(char *) path;
1956 		syscallarg(int) flags;
1957 	} */ *uap;
1958 {
1959 	int error;
1960 	struct nameidata nd;
1961 
1962 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1963 	if ((error = namei(&nd)) != 0)
1964 		return (error);
1965 	NDFREE(&nd, NDF_ONLY_PNBUF);
1966 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1967 	vrele(nd.ni_vp);
1968 	return error;
1969 }
1970 
1971 /*
1972  * Same as chflags() but doesn't follow symlinks.
1973  */
1974 int
1975 lchflags(td, uap)
1976 	struct thread *td;
1977 	register struct lchflags_args /* {
1978 		syscallarg(char *) path;
1979 		syscallarg(int) flags;
1980 	} */ *uap;
1981 {
1982 	int error;
1983 	struct nameidata nd;
1984 
1985 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1986 	if ((error = namei(&nd)) != 0)
1987 		return (error);
1988 	NDFREE(&nd, NDF_ONLY_PNBUF);
1989 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1990 	vrele(nd.ni_vp);
1991 	return error;
1992 }
1993 
1994 /*
1995  * Change flags of a file given a file descriptor.
1996  */
1997 #ifndef _SYS_SYSPROTO_H_
1998 struct fchflags_args {
1999 	int	fd;
2000 	int	flags;
2001 };
2002 #endif
2003 /* ARGSUSED */
2004 int
2005 fchflags(td, uap)
2006 	struct thread *td;
2007 	register struct fchflags_args /* {
2008 		syscallarg(int) fd;
2009 		syscallarg(int) flags;
2010 	} */ *uap;
2011 {
2012 	struct file *fp;
2013 	int error;
2014 
2015 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2016 		return (error);
2017 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2018 	fdrop(fp, td);
2019 	return (error);
2020 }
2021 
2022 /*
2023  * Common implementation code for chmod(), lchmod() and fchmod().
2024  */
2025 static int
2026 setfmode(td, vp, mode)
2027 	struct thread *td;
2028 	struct vnode *vp;
2029 	int mode;
2030 {
2031 	int error;
2032 	struct mount *mp;
2033 	struct vattr vattr;
2034 
2035 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2036 		return (error);
2037 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2038 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2039 	VATTR_NULL(&vattr);
2040 	vattr.va_mode = mode & ALLPERMS;
2041 #ifdef MAC
2042 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2043 	if (error == 0)
2044 #endif
2045 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2046 	VOP_UNLOCK(vp, 0, td);
2047 	vn_finished_write(mp);
2048 	return error;
2049 }
2050 
2051 /*
2052  * Change mode of a file given path name.
2053  */
2054 #ifndef _SYS_SYSPROTO_H_
2055 struct chmod_args {
2056 	char	*path;
2057 	int	mode;
2058 };
2059 #endif
2060 /* ARGSUSED */
2061 int
2062 chmod(td, uap)
2063 	struct thread *td;
2064 	register struct chmod_args /* {
2065 		syscallarg(char *) path;
2066 		syscallarg(int) mode;
2067 	} */ *uap;
2068 {
2069 
2070 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2071 }
2072 
2073 int
2074 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2075 {
2076 	int error;
2077 	struct nameidata nd;
2078 
2079 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2080 	if ((error = namei(&nd)) != 0)
2081 		return (error);
2082 	NDFREE(&nd, NDF_ONLY_PNBUF);
2083 	error = setfmode(td, nd.ni_vp, mode);
2084 	vrele(nd.ni_vp);
2085 	return error;
2086 }
2087 
2088 /*
2089  * Change mode of a file given path name (don't follow links.)
2090  */
2091 #ifndef _SYS_SYSPROTO_H_
2092 struct lchmod_args {
2093 	char	*path;
2094 	int	mode;
2095 };
2096 #endif
2097 /* ARGSUSED */
2098 int
2099 lchmod(td, uap)
2100 	struct thread *td;
2101 	register struct lchmod_args /* {
2102 		syscallarg(char *) path;
2103 		syscallarg(int) mode;
2104 	} */ *uap;
2105 {
2106 	int error;
2107 	struct nameidata nd;
2108 
2109 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2110 	if ((error = namei(&nd)) != 0)
2111 		return (error);
2112 	NDFREE(&nd, NDF_ONLY_PNBUF);
2113 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2114 	vrele(nd.ni_vp);
2115 	return error;
2116 }
2117 
2118 /*
2119  * Change mode of a file given a file descriptor.
2120  */
2121 #ifndef _SYS_SYSPROTO_H_
2122 struct fchmod_args {
2123 	int	fd;
2124 	int	mode;
2125 };
2126 #endif
2127 /* ARGSUSED */
2128 int
2129 fchmod(td, uap)
2130 	struct thread *td;
2131 	register struct fchmod_args /* {
2132 		syscallarg(int) fd;
2133 		syscallarg(int) mode;
2134 	} */ *uap;
2135 {
2136 	struct file *fp;
2137 	struct vnode *vp;
2138 	int error;
2139 
2140 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2141 		return (error);
2142 	vp = (struct vnode *)fp->f_data;
2143 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2144 	fdrop(fp, td);
2145 	return (error);
2146 }
2147 
2148 /*
2149  * Common implementation for chown(), lchown(), and fchown()
2150  */
2151 static int
2152 setfown(td, vp, uid, gid)
2153 	struct thread *td;
2154 	struct vnode *vp;
2155 	uid_t uid;
2156 	gid_t gid;
2157 {
2158 	int error;
2159 	struct mount *mp;
2160 	struct vattr vattr;
2161 
2162 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2163 		return (error);
2164 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2165 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2166 	VATTR_NULL(&vattr);
2167 	vattr.va_uid = uid;
2168 	vattr.va_gid = gid;
2169 #ifdef MAC
2170 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2171 	    vattr.va_gid);
2172 	if (error == 0)
2173 #endif
2174 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2175 	VOP_UNLOCK(vp, 0, td);
2176 	vn_finished_write(mp);
2177 	return error;
2178 }
2179 
2180 /*
2181  * Set ownership given a path name.
2182  */
2183 #ifndef _SYS_SYSPROTO_H_
2184 struct chown_args {
2185 	char	*path;
2186 	int	uid;
2187 	int	gid;
2188 };
2189 #endif
2190 /* ARGSUSED */
2191 int
2192 chown(td, uap)
2193 	struct thread *td;
2194 	register struct chown_args /* {
2195 		syscallarg(char *) path;
2196 		syscallarg(int) uid;
2197 		syscallarg(int) gid;
2198 	} */ *uap;
2199 {
2200 
2201 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2202 }
2203 
2204 int
2205 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2206     int gid)
2207 {
2208 	int error;
2209 	struct nameidata nd;
2210 
2211 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2212 	if ((error = namei(&nd)) != 0)
2213 		return (error);
2214 	NDFREE(&nd, NDF_ONLY_PNBUF);
2215 	error = setfown(td, nd.ni_vp, uid, gid);
2216 	vrele(nd.ni_vp);
2217 	return (error);
2218 }
2219 
2220 /*
2221  * Set ownership given a path name, do not cross symlinks.
2222  */
2223 #ifndef _SYS_SYSPROTO_H_
2224 struct lchown_args {
2225 	char	*path;
2226 	int	uid;
2227 	int	gid;
2228 };
2229 #endif
2230 /* ARGSUSED */
2231 int
2232 lchown(td, uap)
2233 	struct thread *td;
2234 	register struct lchown_args /* {
2235 		syscallarg(char *) path;
2236 		syscallarg(int) uid;
2237 		syscallarg(int) gid;
2238 	} */ *uap;
2239 {
2240 
2241 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2242 }
2243 
2244 int
2245 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2246     int gid)
2247 {
2248 	int error;
2249 	struct nameidata nd;
2250 
2251 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2252 	if ((error = namei(&nd)) != 0)
2253 		return (error);
2254 	NDFREE(&nd, NDF_ONLY_PNBUF);
2255 	error = setfown(td, nd.ni_vp, uid, gid);
2256 	vrele(nd.ni_vp);
2257 	return (error);
2258 }
2259 
2260 /*
2261  * Set ownership given a file descriptor.
2262  */
2263 #ifndef _SYS_SYSPROTO_H_
2264 struct fchown_args {
2265 	int	fd;
2266 	int	uid;
2267 	int	gid;
2268 };
2269 #endif
2270 /* ARGSUSED */
2271 int
2272 fchown(td, uap)
2273 	struct thread *td;
2274 	register struct fchown_args /* {
2275 		syscallarg(int) fd;
2276 		syscallarg(int) uid;
2277 		syscallarg(int) gid;
2278 	} */ *uap;
2279 {
2280 	struct file *fp;
2281 	struct vnode *vp;
2282 	int error;
2283 
2284 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2285 		return (error);
2286 	vp = (struct vnode *)fp->f_data;
2287 	error = setfown(td, (struct vnode *)fp->f_data,
2288 		SCARG(uap, uid), SCARG(uap, gid));
2289 	fdrop(fp, td);
2290 	return (error);
2291 }
2292 
2293 /*
2294  * Common implementation code for utimes(), lutimes(), and futimes().
2295  */
2296 static int
2297 getutimes(usrtvp, tvpseg, tsp)
2298 	const struct timeval *usrtvp;
2299 	enum uio_seg tvpseg;
2300 	struct timespec *tsp;
2301 {
2302 	struct timeval tv[2];
2303 	const struct timeval *tvp;
2304 	int error;
2305 
2306 	if (usrtvp == NULL) {
2307 		microtime(&tv[0]);
2308 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2309 		tsp[1] = tsp[0];
2310 	} else {
2311 		if (tvpseg == UIO_SYSSPACE) {
2312 			tvp = usrtvp;
2313 		} else {
2314 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2315 				return (error);
2316 			tvp = tv;
2317 		}
2318 
2319 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2320 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2321 	}
2322 	return 0;
2323 }
2324 
2325 /*
2326  * Common implementation code for utimes(), lutimes(), and futimes().
2327  */
2328 static int
2329 setutimes(td, vp, ts, numtimes, nullflag)
2330 	struct thread *td;
2331 	struct vnode *vp;
2332 	const struct timespec *ts;
2333 	int numtimes;
2334 	int nullflag;
2335 {
2336 	int error, setbirthtime;
2337 	struct mount *mp;
2338 	struct vattr vattr;
2339 
2340 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2341 		return (error);
2342 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2343 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2344 	setbirthtime = 0;
2345 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2346 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2347 		setbirthtime = 1;
2348 	VATTR_NULL(&vattr);
2349 	vattr.va_atime = ts[0];
2350 	vattr.va_mtime = ts[1];
2351 	if (setbirthtime)
2352 		vattr.va_birthtime = ts[1];
2353 	if (numtimes > 2)
2354 		vattr.va_birthtime = ts[2];
2355 	if (nullflag)
2356 		vattr.va_vaflags |= VA_UTIMES_NULL;
2357 #ifdef MAC
2358 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2359 	    vattr.va_mtime);
2360 #endif
2361 	if (error == 0)
2362 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2363 	VOP_UNLOCK(vp, 0, td);
2364 	vn_finished_write(mp);
2365 	return error;
2366 }
2367 
2368 /*
2369  * Set the access and modification times of a file.
2370  */
2371 #ifndef _SYS_SYSPROTO_H_
2372 struct utimes_args {
2373 	char	*path;
2374 	struct	timeval *tptr;
2375 };
2376 #endif
2377 /* ARGSUSED */
2378 int
2379 utimes(td, uap)
2380 	struct thread *td;
2381 	register struct utimes_args /* {
2382 		syscallarg(char *) path;
2383 		syscallarg(struct timeval *) tptr;
2384 	} */ *uap;
2385 {
2386 
2387 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2388 	    UIO_USERSPACE));
2389 }
2390 
2391 int
2392 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2393     struct timeval *tptr, enum uio_seg tptrseg)
2394 {
2395 	struct timespec ts[2];
2396 	int error;
2397 	struct nameidata nd;
2398 
2399 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2400 		return (error);
2401 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2402 	if ((error = namei(&nd)) != 0)
2403 		return (error);
2404 	NDFREE(&nd, NDF_ONLY_PNBUF);
2405 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2406 	vrele(nd.ni_vp);
2407 	return (error);
2408 }
2409 
2410 /*
2411  * Set the access and modification times of a file.
2412  */
2413 #ifndef _SYS_SYSPROTO_H_
2414 struct lutimes_args {
2415 	char	*path;
2416 	struct	timeval *tptr;
2417 };
2418 #endif
2419 /* ARGSUSED */
2420 int
2421 lutimes(td, uap)
2422 	struct thread *td;
2423 	register struct lutimes_args /* {
2424 		syscallarg(char *) path;
2425 		syscallarg(struct timeval *) tptr;
2426 	} */ *uap;
2427 {
2428 
2429 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2430 	    UIO_USERSPACE));
2431 }
2432 
2433 int
2434 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2435     struct timeval *tptr, enum uio_seg tptrseg)
2436 {
2437 	struct timespec ts[2];
2438 	int error;
2439 	struct nameidata nd;
2440 
2441 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2442 		return (error);
2443 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2444 	if ((error = namei(&nd)) != 0)
2445 		return (error);
2446 	NDFREE(&nd, NDF_ONLY_PNBUF);
2447 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2448 	vrele(nd.ni_vp);
2449 	return (error);
2450 }
2451 
2452 /*
2453  * Set the access and modification times of a file.
2454  */
2455 #ifndef _SYS_SYSPROTO_H_
2456 struct futimes_args {
2457 	int	fd;
2458 	struct	timeval *tptr;
2459 };
2460 #endif
2461 /* ARGSUSED */
2462 int
2463 futimes(td, uap)
2464 	struct thread *td;
2465 	register struct futimes_args /* {
2466 		syscallarg(int ) fd;
2467 		syscallarg(struct timeval *) tptr;
2468 	} */ *uap;
2469 {
2470 
2471 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2472 }
2473 
2474 int
2475 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2476     enum uio_seg tptrseg)
2477 {
2478 	struct timespec ts[2];
2479 	struct file *fp;
2480 	int error;
2481 
2482 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2483 		return (error);
2484 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2485 		return (error);
2486 	error = setutimes(td, (struct vnode *)fp->f_data, ts, 2, tptr == NULL);
2487 	fdrop(fp, td);
2488 	return (error);
2489 }
2490 
2491 /*
2492  * Truncate a file given its path name.
2493  */
2494 #ifndef _SYS_SYSPROTO_H_
2495 struct truncate_args {
2496 	char	*path;
2497 	int	pad;
2498 	off_t	length;
2499 };
2500 #endif
2501 /* ARGSUSED */
2502 int
2503 truncate(td, uap)
2504 	struct thread *td;
2505 	register struct truncate_args /* {
2506 		syscallarg(char *) path;
2507 		syscallarg(int) pad;
2508 		syscallarg(off_t) length;
2509 	} */ *uap;
2510 {
2511 
2512 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2513 }
2514 
2515 int
2516 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2517 {
2518 	struct mount *mp;
2519 	struct vnode *vp;
2520 	struct vattr vattr;
2521 	int error;
2522 	struct nameidata nd;
2523 
2524 	if (length < 0)
2525 		return(EINVAL);
2526 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2527 	if ((error = namei(&nd)) != 0)
2528 		return (error);
2529 	vp = nd.ni_vp;
2530 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2531 		vrele(vp);
2532 		return (error);
2533 	}
2534 	NDFREE(&nd, NDF_ONLY_PNBUF);
2535 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2536 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2537 	if (vp->v_type == VDIR)
2538 		error = EISDIR;
2539 #ifdef MAC
2540 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2541 	}
2542 #endif
2543 	else if ((error = vn_writechk(vp)) == 0 &&
2544 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2545 		VATTR_NULL(&vattr);
2546 		vattr.va_size = length;
2547 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2548 	}
2549 	vput(vp);
2550 	vn_finished_write(mp);
2551 	return (error);
2552 }
2553 
2554 /*
2555  * Truncate a file given a file descriptor.
2556  */
2557 #ifndef _SYS_SYSPROTO_H_
2558 struct ftruncate_args {
2559 	int	fd;
2560 	int	pad;
2561 	off_t	length;
2562 };
2563 #endif
2564 /* ARGSUSED */
2565 int
2566 ftruncate(td, uap)
2567 	struct thread *td;
2568 	register struct ftruncate_args /* {
2569 		syscallarg(int) fd;
2570 		syscallarg(int) pad;
2571 		syscallarg(off_t) length;
2572 	} */ *uap;
2573 {
2574 	struct mount *mp;
2575 	struct vattr vattr;
2576 	struct vnode *vp;
2577 	struct file *fp;
2578 	int error;
2579 
2580 	if (uap->length < 0)
2581 		return(EINVAL);
2582 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2583 		return (error);
2584 	if ((fp->f_flag & FWRITE) == 0) {
2585 		fdrop(fp, td);
2586 		return (EINVAL);
2587 	}
2588 	vp = (struct vnode *)fp->f_data;
2589 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2590 		fdrop(fp, td);
2591 		return (error);
2592 	}
2593 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2594 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2595 	if (vp->v_type == VDIR)
2596 		error = EISDIR;
2597 #ifdef MAC
2598 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2599 	    vp))) {
2600 	}
2601 #endif
2602 	else if ((error = vn_writechk(vp)) == 0) {
2603 		VATTR_NULL(&vattr);
2604 		vattr.va_size = SCARG(uap, length);
2605 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2606 	}
2607 	VOP_UNLOCK(vp, 0, td);
2608 	vn_finished_write(mp);
2609 	fdrop(fp, td);
2610 	return (error);
2611 }
2612 
2613 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2614 /*
2615  * Truncate a file given its path name.
2616  */
2617 #ifndef _SYS_SYSPROTO_H_
2618 struct otruncate_args {
2619 	char	*path;
2620 	long	length;
2621 };
2622 #endif
2623 /* ARGSUSED */
2624 int
2625 otruncate(td, uap)
2626 	struct thread *td;
2627 	register struct otruncate_args /* {
2628 		syscallarg(char *) path;
2629 		syscallarg(long) length;
2630 	} */ *uap;
2631 {
2632 	struct truncate_args /* {
2633 		syscallarg(char *) path;
2634 		syscallarg(int) pad;
2635 		syscallarg(off_t) length;
2636 	} */ nuap;
2637 
2638 	SCARG(&nuap, path) = SCARG(uap, path);
2639 	SCARG(&nuap, length) = SCARG(uap, length);
2640 	return (truncate(td, &nuap));
2641 }
2642 
2643 /*
2644  * Truncate a file given a file descriptor.
2645  */
2646 #ifndef _SYS_SYSPROTO_H_
2647 struct oftruncate_args {
2648 	int	fd;
2649 	long	length;
2650 };
2651 #endif
2652 /* ARGSUSED */
2653 int
2654 oftruncate(td, uap)
2655 	struct thread *td;
2656 	register struct oftruncate_args /* {
2657 		syscallarg(int) fd;
2658 		syscallarg(long) length;
2659 	} */ *uap;
2660 {
2661 	struct ftruncate_args /* {
2662 		syscallarg(int) fd;
2663 		syscallarg(int) pad;
2664 		syscallarg(off_t) length;
2665 	} */ nuap;
2666 
2667 	SCARG(&nuap, fd) = SCARG(uap, fd);
2668 	SCARG(&nuap, length) = SCARG(uap, length);
2669 	return (ftruncate(td, &nuap));
2670 }
2671 #endif /* COMPAT_43 || COMPAT_SUNOS */
2672 
2673 /*
2674  * Sync an open file.
2675  */
2676 #ifndef _SYS_SYSPROTO_H_
2677 struct fsync_args {
2678 	int	fd;
2679 };
2680 #endif
2681 /* ARGSUSED */
2682 int
2683 fsync(td, uap)
2684 	struct thread *td;
2685 	struct fsync_args /* {
2686 		syscallarg(int) fd;
2687 	} */ *uap;
2688 {
2689 	struct vnode *vp;
2690 	struct mount *mp;
2691 	struct file *fp;
2692 	vm_object_t obj;
2693 	int error;
2694 
2695 	GIANT_REQUIRED;
2696 
2697 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2698 		return (error);
2699 	vp = (struct vnode *)fp->f_data;
2700 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2701 		fdrop(fp, td);
2702 		return (error);
2703 	}
2704 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2705 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2706 		vm_object_page_clean(obj, 0, 0, 0);
2707 	}
2708 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2709 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2710 	    && softdep_fsync_hook != NULL)
2711 		error = (*softdep_fsync_hook)(vp);
2712 
2713 	VOP_UNLOCK(vp, 0, td);
2714 	vn_finished_write(mp);
2715 	fdrop(fp, td);
2716 	return (error);
2717 }
2718 
2719 /*
2720  * Rename files.  Source and destination must either both be directories,
2721  * or both not be directories.  If target is a directory, it must be empty.
2722  */
2723 #ifndef _SYS_SYSPROTO_H_
2724 struct rename_args {
2725 	char	*from;
2726 	char	*to;
2727 };
2728 #endif
2729 /* ARGSUSED */
2730 int
2731 rename(td, uap)
2732 	struct thread *td;
2733 	register struct rename_args /* {
2734 		syscallarg(char *) from;
2735 		syscallarg(char *) to;
2736 	} */ *uap;
2737 {
2738 
2739 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2740 }
2741 
2742 int
2743 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2744 {
2745 	struct mount *mp;
2746 	struct vnode *tvp, *fvp, *tdvp;
2747 	struct nameidata fromnd, tond;
2748 	int error;
2749 
2750 	bwillwrite();
2751 #ifdef MAC
2752 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2753 	    from, td);
2754 #else
2755 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2756 #endif
2757 	if ((error = namei(&fromnd)) != 0)
2758 		return (error);
2759 #ifdef MAC
2760 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2761 	    fromnd.ni_vp, &fromnd.ni_cnd);
2762 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2763 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2764 #endif
2765 	fvp = fromnd.ni_vp;
2766 	if (error == 0)
2767 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2768 	if (error != 0) {
2769 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2770 		vrele(fromnd.ni_dvp);
2771 		vrele(fvp);
2772 		goto out1;
2773 	}
2774 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2775 	    NOOBJ, pathseg, to, td);
2776 	if (fromnd.ni_vp->v_type == VDIR)
2777 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2778 	if ((error = namei(&tond)) != 0) {
2779 		/* Translate error code for rename("dir1", "dir2/."). */
2780 		if (error == EISDIR && fvp->v_type == VDIR)
2781 			error = EINVAL;
2782 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2783 		vrele(fromnd.ni_dvp);
2784 		vrele(fvp);
2785 		goto out1;
2786 	}
2787 	tdvp = tond.ni_dvp;
2788 	tvp = tond.ni_vp;
2789 	if (tvp != NULL) {
2790 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2791 			error = ENOTDIR;
2792 			goto out;
2793 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2794 			error = EISDIR;
2795 			goto out;
2796 		}
2797 	}
2798 	if (fvp == tdvp)
2799 		error = EINVAL;
2800 	/*
2801 	 * If the source is the same as the destination (that is, if they
2802 	 * are links to the same vnode), then there is nothing to do.
2803 	 */
2804 	if (fvp == tvp)
2805 		error = -1;
2806 #ifdef MAC
2807 	else
2808 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2809 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2810 #endif
2811 out:
2812 	if (!error) {
2813 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2814 		if (fromnd.ni_dvp != tdvp) {
2815 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2816 		}
2817 		if (tvp) {
2818 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2819 		}
2820 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2821 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2822 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2823 		NDFREE(&tond, NDF_ONLY_PNBUF);
2824 	} else {
2825 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2826 		NDFREE(&tond, NDF_ONLY_PNBUF);
2827 		if (tdvp == tvp)
2828 			vrele(tdvp);
2829 		else
2830 			vput(tdvp);
2831 		if (tvp)
2832 			vput(tvp);
2833 		vrele(fromnd.ni_dvp);
2834 		vrele(fvp);
2835 	}
2836 	vrele(tond.ni_startdir);
2837 	vn_finished_write(mp);
2838 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2839 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2840 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2841 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2842 out1:
2843 	if (fromnd.ni_startdir)
2844 		vrele(fromnd.ni_startdir);
2845 	if (error == -1)
2846 		return (0);
2847 	return (error);
2848 }
2849 
2850 /*
2851  * Make a directory file.
2852  */
2853 #ifndef _SYS_SYSPROTO_H_
2854 struct mkdir_args {
2855 	char	*path;
2856 	int	mode;
2857 };
2858 #endif
2859 /* ARGSUSED */
2860 int
2861 mkdir(td, uap)
2862 	struct thread *td;
2863 	register struct mkdir_args /* {
2864 		syscallarg(char *) path;
2865 		syscallarg(int) mode;
2866 	} */ *uap;
2867 {
2868 
2869 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2870 }
2871 
2872 int
2873 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2874 {
2875 	struct mount *mp;
2876 	struct vnode *vp;
2877 	struct vattr vattr;
2878 	int error;
2879 	struct nameidata nd;
2880 
2881 restart:
2882 	bwillwrite();
2883 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2884 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2885 	if ((error = namei(&nd)) != 0)
2886 		return (error);
2887 	vp = nd.ni_vp;
2888 	if (vp != NULL) {
2889 		NDFREE(&nd, NDF_ONLY_PNBUF);
2890 		vrele(vp);
2891 		/*
2892 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2893 		 * the strange behaviour of leaving the vnode unlocked
2894 		 * if the target is the same vnode as the parent.
2895 		 */
2896 		if (vp == nd.ni_dvp)
2897 			vrele(nd.ni_dvp);
2898 		else
2899 			vput(nd.ni_dvp);
2900 		return (EEXIST);
2901 	}
2902 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2903 		NDFREE(&nd, NDF_ONLY_PNBUF);
2904 		vput(nd.ni_dvp);
2905 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2906 			return (error);
2907 		goto restart;
2908 	}
2909 	VATTR_NULL(&vattr);
2910 	vattr.va_type = VDIR;
2911 	FILEDESC_LOCK(td->td_proc->p_fd);
2912 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2913 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2914 #ifdef MAC
2915 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2916 	    &vattr);
2917 	if (error)
2918 		goto out;
2919 #endif
2920 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2921 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2922 #ifdef MAC
2923 out:
2924 #endif
2925 	NDFREE(&nd, NDF_ONLY_PNBUF);
2926 	vput(nd.ni_dvp);
2927 	if (!error)
2928 		vput(nd.ni_vp);
2929 	vn_finished_write(mp);
2930 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2931 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2932 	return (error);
2933 }
2934 
2935 /*
2936  * Remove a directory file.
2937  */
2938 #ifndef _SYS_SYSPROTO_H_
2939 struct rmdir_args {
2940 	char	*path;
2941 };
2942 #endif
2943 /* ARGSUSED */
2944 int
2945 rmdir(td, uap)
2946 	struct thread *td;
2947 	struct rmdir_args /* {
2948 		syscallarg(char *) path;
2949 	} */ *uap;
2950 {
2951 
2952 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2953 }
2954 
2955 int
2956 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2957 {
2958 	struct mount *mp;
2959 	struct vnode *vp;
2960 	int error;
2961 	struct nameidata nd;
2962 
2963 restart:
2964 	bwillwrite();
2965 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2966 	if ((error = namei(&nd)) != 0)
2967 		return (error);
2968 	vp = nd.ni_vp;
2969 	if (vp->v_type != VDIR) {
2970 		error = ENOTDIR;
2971 		goto out;
2972 	}
2973 	/*
2974 	 * No rmdir "." please.
2975 	 */
2976 	if (nd.ni_dvp == vp) {
2977 		error = EINVAL;
2978 		goto out;
2979 	}
2980 	/*
2981 	 * The root of a mounted filesystem cannot be deleted.
2982 	 */
2983 	if (vp->v_vflag & VV_ROOT) {
2984 		error = EBUSY;
2985 		goto out;
2986 	}
2987 #ifdef MAC
2988 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
2989 	    &nd.ni_cnd);
2990 	if (error)
2991 		goto out;
2992 #endif
2993 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2994 		NDFREE(&nd, NDF_ONLY_PNBUF);
2995 		if (nd.ni_dvp == vp)
2996 			vrele(nd.ni_dvp);
2997 		else
2998 			vput(nd.ni_dvp);
2999 		vput(vp);
3000 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3001 			return (error);
3002 		goto restart;
3003 	}
3004 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3005 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3006 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3007 	vn_finished_write(mp);
3008 out:
3009 	NDFREE(&nd, NDF_ONLY_PNBUF);
3010 	if (nd.ni_dvp == vp)
3011 		vrele(nd.ni_dvp);
3012 	else
3013 		vput(nd.ni_dvp);
3014 	vput(vp);
3015 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3016 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3017 	return (error);
3018 }
3019 
3020 #ifdef COMPAT_43
3021 /*
3022  * Read a block of directory entries in a filesystem independent format.
3023  */
3024 #ifndef _SYS_SYSPROTO_H_
3025 struct ogetdirentries_args {
3026 	int	fd;
3027 	char	*buf;
3028 	u_int	count;
3029 	long	*basep;
3030 };
3031 #endif
3032 int
3033 ogetdirentries(td, uap)
3034 	struct thread *td;
3035 	register struct ogetdirentries_args /* {
3036 		syscallarg(int) fd;
3037 		syscallarg(char *) buf;
3038 		syscallarg(u_int) count;
3039 		syscallarg(long *) basep;
3040 	} */ *uap;
3041 {
3042 	struct vnode *vp;
3043 	struct file *fp;
3044 	struct uio auio, kuio;
3045 	struct iovec aiov, kiov;
3046 	struct dirent *dp, *edp;
3047 	caddr_t dirbuf;
3048 	int error, eofflag, readcnt;
3049 	long loff;
3050 
3051 	/* XXX arbitrary sanity limit on `count'. */
3052 	if (SCARG(uap, count) > 64 * 1024)
3053 		return (EINVAL);
3054 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3055 		return (error);
3056 	if ((fp->f_flag & FREAD) == 0) {
3057 		fdrop(fp, td);
3058 		return (EBADF);
3059 	}
3060 	vp = (struct vnode *)fp->f_data;
3061 unionread:
3062 	if (vp->v_type != VDIR) {
3063 		fdrop(fp, td);
3064 		return (EINVAL);
3065 	}
3066 	aiov.iov_base = SCARG(uap, buf);
3067 	aiov.iov_len = SCARG(uap, count);
3068 	auio.uio_iov = &aiov;
3069 	auio.uio_iovcnt = 1;
3070 	auio.uio_rw = UIO_READ;
3071 	auio.uio_segflg = UIO_USERSPACE;
3072 	auio.uio_td = td;
3073 	auio.uio_resid = SCARG(uap, count);
3074 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3075 	loff = auio.uio_offset = fp->f_offset;
3076 #ifdef MAC
3077 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3078 	if (error) {
3079 		VOP_UNLOCK(vp, 0, td);
3080 		fdrop(fp, td);
3081 		return (error);
3082 	}
3083 #endif
3084 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3085 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3086 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3087 			    NULL, NULL);
3088 			fp->f_offset = auio.uio_offset;
3089 		} else
3090 #	endif
3091 	{
3092 		kuio = auio;
3093 		kuio.uio_iov = &kiov;
3094 		kuio.uio_segflg = UIO_SYSSPACE;
3095 		kiov.iov_len = SCARG(uap, count);
3096 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3097 		kiov.iov_base = dirbuf;
3098 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3099 			    NULL, NULL);
3100 		fp->f_offset = kuio.uio_offset;
3101 		if (error == 0) {
3102 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3103 			edp = (struct dirent *)&dirbuf[readcnt];
3104 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3105 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3106 					/*
3107 					 * The expected low byte of
3108 					 * dp->d_namlen is our dp->d_type.
3109 					 * The high MBZ byte of dp->d_namlen
3110 					 * is our dp->d_namlen.
3111 					 */
3112 					dp->d_type = dp->d_namlen;
3113 					dp->d_namlen = 0;
3114 #				else
3115 					/*
3116 					 * The dp->d_type is the high byte
3117 					 * of the expected dp->d_namlen,
3118 					 * so must be zero'ed.
3119 					 */
3120 					dp->d_type = 0;
3121 #				endif
3122 				if (dp->d_reclen > 0) {
3123 					dp = (struct dirent *)
3124 					    ((char *)dp + dp->d_reclen);
3125 				} else {
3126 					error = EIO;
3127 					break;
3128 				}
3129 			}
3130 			if (dp >= edp)
3131 				error = uiomove(dirbuf, readcnt, &auio);
3132 		}
3133 		FREE(dirbuf, M_TEMP);
3134 	}
3135 	VOP_UNLOCK(vp, 0, td);
3136 	if (error) {
3137 		fdrop(fp, td);
3138 		return (error);
3139 	}
3140 	if (SCARG(uap, count) == auio.uio_resid) {
3141 		if (union_dircheckp) {
3142 			error = union_dircheckp(td, &vp, fp);
3143 			if (error == -1)
3144 				goto unionread;
3145 			if (error) {
3146 				fdrop(fp, td);
3147 				return (error);
3148 			}
3149 		}
3150 		/*
3151 		 * XXX We could delay dropping the lock above but
3152 		 * union_dircheckp complicates things.
3153 		 */
3154 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3155 		if ((vp->v_vflag & VV_ROOT) &&
3156 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3157 			struct vnode *tvp = vp;
3158 			vp = vp->v_mount->mnt_vnodecovered;
3159 			VREF(vp);
3160 			fp->f_data = vp;
3161 			fp->f_offset = 0;
3162 			vput(tvp);
3163 			goto unionread;
3164 		}
3165 		VOP_UNLOCK(vp, 0, td);
3166 	}
3167 	error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3168 	fdrop(fp, td);
3169 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3170 	return (error);
3171 }
3172 #endif /* COMPAT_43 */
3173 
3174 /*
3175  * Read a block of directory entries in a filesystem independent format.
3176  */
3177 #ifndef _SYS_SYSPROTO_H_
3178 struct getdirentries_args {
3179 	int	fd;
3180 	char	*buf;
3181 	u_int	count;
3182 	long	*basep;
3183 };
3184 #endif
3185 int
3186 getdirentries(td, uap)
3187 	struct thread *td;
3188 	register struct getdirentries_args /* {
3189 		syscallarg(int) fd;
3190 		syscallarg(char *) buf;
3191 		syscallarg(u_int) count;
3192 		syscallarg(long *) basep;
3193 	} */ *uap;
3194 {
3195 	struct vnode *vp;
3196 	struct file *fp;
3197 	struct uio auio;
3198 	struct iovec aiov;
3199 	long loff;
3200 	int error, eofflag;
3201 
3202 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3203 		return (error);
3204 	if ((fp->f_flag & FREAD) == 0) {
3205 		fdrop(fp, td);
3206 		return (EBADF);
3207 	}
3208 	vp = (struct vnode *)fp->f_data;
3209 unionread:
3210 	if (vp->v_type != VDIR) {
3211 		fdrop(fp, td);
3212 		return (EINVAL);
3213 	}
3214 	aiov.iov_base = SCARG(uap, buf);
3215 	aiov.iov_len = SCARG(uap, count);
3216 	auio.uio_iov = &aiov;
3217 	auio.uio_iovcnt = 1;
3218 	auio.uio_rw = UIO_READ;
3219 	auio.uio_segflg = UIO_USERSPACE;
3220 	auio.uio_td = td;
3221 	auio.uio_resid = SCARG(uap, count);
3222 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3223 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3224 	loff = auio.uio_offset = fp->f_offset;
3225 #ifdef MAC
3226 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3227 	if (error == 0)
3228 #endif
3229 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3230 		    NULL);
3231 	fp->f_offset = auio.uio_offset;
3232 	VOP_UNLOCK(vp, 0, td);
3233 	if (error) {
3234 		fdrop(fp, td);
3235 		return (error);
3236 	}
3237 	if (SCARG(uap, count) == auio.uio_resid) {
3238 		if (union_dircheckp) {
3239 			error = union_dircheckp(td, &vp, fp);
3240 			if (error == -1)
3241 				goto unionread;
3242 			if (error) {
3243 				fdrop(fp, td);
3244 				return (error);
3245 			}
3246 		}
3247 		/*
3248 		 * XXX We could delay dropping the lock above but
3249 		 * union_dircheckp complicates things.
3250 		 */
3251 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3252 		if ((vp->v_vflag & VV_ROOT) &&
3253 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3254 			struct vnode *tvp = vp;
3255 			vp = vp->v_mount->mnt_vnodecovered;
3256 			VREF(vp);
3257 			fp->f_data = vp;
3258 			fp->f_offset = 0;
3259 			vput(tvp);
3260 			goto unionread;
3261 		}
3262 		VOP_UNLOCK(vp, 0, td);
3263 	}
3264 	if (SCARG(uap, basep) != NULL) {
3265 		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3266 	}
3267 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3268 	fdrop(fp, td);
3269 	return (error);
3270 }
3271 #ifndef _SYS_SYSPROTO_H_
3272 struct getdents_args {
3273 	int fd;
3274 	char *buf;
3275 	size_t count;
3276 };
3277 #endif
3278 int
3279 getdents(td, uap)
3280 	struct thread *td;
3281 	register struct getdents_args /* {
3282 		syscallarg(int) fd;
3283 		syscallarg(char *) buf;
3284 		syscallarg(u_int) count;
3285 	} */ *uap;
3286 {
3287 	struct getdirentries_args ap;
3288 	ap.fd = uap->fd;
3289 	ap.buf = uap->buf;
3290 	ap.count = uap->count;
3291 	ap.basep = NULL;
3292 	return getdirentries(td, &ap);
3293 }
3294 
3295 /*
3296  * Set the mode mask for creation of filesystem nodes.
3297  *
3298  * MP SAFE
3299  */
3300 #ifndef _SYS_SYSPROTO_H_
3301 struct umask_args {
3302 	int	newmask;
3303 };
3304 #endif
3305 int
3306 umask(td, uap)
3307 	struct thread *td;
3308 	struct umask_args /* {
3309 		syscallarg(int) newmask;
3310 	} */ *uap;
3311 {
3312 	register struct filedesc *fdp;
3313 
3314 	FILEDESC_LOCK(td->td_proc->p_fd);
3315 	fdp = td->td_proc->p_fd;
3316 	td->td_retval[0] = fdp->fd_cmask;
3317 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3318 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3319 	return (0);
3320 }
3321 
3322 /*
3323  * Void all references to file by ripping underlying filesystem
3324  * away from vnode.
3325  */
3326 #ifndef _SYS_SYSPROTO_H_
3327 struct revoke_args {
3328 	char	*path;
3329 };
3330 #endif
3331 /* ARGSUSED */
3332 int
3333 revoke(td, uap)
3334 	struct thread *td;
3335 	register struct revoke_args /* {
3336 		syscallarg(char *) path;
3337 	} */ *uap;
3338 {
3339 	struct mount *mp;
3340 	struct vnode *vp;
3341 	struct vattr vattr;
3342 	int error;
3343 	struct nameidata nd;
3344 
3345 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3346 	    td);
3347 	if ((error = namei(&nd)) != 0)
3348 		return (error);
3349 	vp = nd.ni_vp;
3350 	NDFREE(&nd, NDF_ONLY_PNBUF);
3351 	if (vp->v_type != VCHR) {
3352 		vput(vp);
3353 		return (EINVAL);
3354 	}
3355 #ifdef MAC
3356 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3357 	if (error) {
3358 		vput(vp);
3359 		return (error);
3360 	}
3361 #endif
3362 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3363 	if (error) {
3364 		vput(vp);
3365 		return (error);
3366 	}
3367 	VOP_UNLOCK(vp, 0, td);
3368 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3369 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3370 		if (error)
3371 			goto out;
3372 	}
3373 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3374 		goto out;
3375 	if (vcount(vp) > 1)
3376 		VOP_REVOKE(vp, REVOKEALL);
3377 	vn_finished_write(mp);
3378 out:
3379 	vrele(vp);
3380 	return (error);
3381 }
3382 
3383 /*
3384  * Convert a user file descriptor to a kernel file entry.
3385  * The file entry is locked upon returning.
3386  */
3387 int
3388 getvnode(fdp, fd, fpp)
3389 	struct filedesc *fdp;
3390 	int fd;
3391 	struct file **fpp;
3392 {
3393 	int error;
3394 	struct file *fp;
3395 
3396 	fp = NULL;
3397 	if (fdp == NULL)
3398 		error = EBADF;
3399 	else {
3400 		FILEDESC_LOCK(fdp);
3401 		if ((u_int)fd >= fdp->fd_nfiles ||
3402 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3403 			error = EBADF;
3404 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3405 			fp = NULL;
3406 			error = EINVAL;
3407 		} else {
3408 			fhold(fp);
3409 			error = 0;
3410 		}
3411 		FILEDESC_UNLOCK(fdp);
3412 	}
3413 	*fpp = fp;
3414 	return (error);
3415 }
3416 /*
3417  * Get (NFS) file handle
3418  */
3419 #ifndef _SYS_SYSPROTO_H_
3420 struct getfh_args {
3421 	char	*fname;
3422 	fhandle_t *fhp;
3423 };
3424 #endif
3425 int
3426 getfh(td, uap)
3427 	struct thread *td;
3428 	register struct getfh_args *uap;
3429 {
3430 	struct nameidata nd;
3431 	fhandle_t fh;
3432 	register struct vnode *vp;
3433 	int error;
3434 
3435 	/*
3436 	 * Must be super user
3437 	 */
3438 	error = suser(td);
3439 	if (error)
3440 		return (error);
3441 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3442 	error = namei(&nd);
3443 	if (error)
3444 		return (error);
3445 	NDFREE(&nd, NDF_ONLY_PNBUF);
3446 	vp = nd.ni_vp;
3447 	bzero(&fh, sizeof(fh));
3448 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3449 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3450 	vput(vp);
3451 	if (error)
3452 		return (error);
3453 	error = copyout(&fh, uap->fhp, sizeof (fh));
3454 	return (error);
3455 }
3456 
3457 /*
3458  * syscall for the rpc.lockd to use to translate a NFS file handle into
3459  * an open descriptor.
3460  *
3461  * warning: do not remove the suser() call or this becomes one giant
3462  * security hole.
3463  */
3464 #ifndef _SYS_SYSPROTO_H_
3465 struct fhopen_args {
3466 	const struct fhandle *u_fhp;
3467 	int flags;
3468 };
3469 #endif
3470 int
3471 fhopen(td, uap)
3472 	struct thread *td;
3473 	struct fhopen_args /* {
3474 		syscallarg(const struct fhandle *) u_fhp;
3475 		syscallarg(int) flags;
3476 	} */ *uap;
3477 {
3478 	struct proc *p = td->td_proc;
3479 	struct mount *mp;
3480 	struct vnode *vp;
3481 	struct fhandle fhp;
3482 	struct vattr vat;
3483 	struct vattr *vap = &vat;
3484 	struct flock lf;
3485 	struct file *fp;
3486 	register struct filedesc *fdp = p->p_fd;
3487 	int fmode, mode, error, type;
3488 	struct file *nfp;
3489 	int indx;
3490 
3491 	/*
3492 	 * Must be super user
3493 	 */
3494 	error = suser(td);
3495 	if (error)
3496 		return (error);
3497 
3498 	fmode = FFLAGS(SCARG(uap, flags));
3499 	/* why not allow a non-read/write open for our lockd? */
3500 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3501 		return (EINVAL);
3502 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3503 	if (error)
3504 		return(error);
3505 	/* find the mount point */
3506 	mp = vfs_getvfs(&fhp.fh_fsid);
3507 	if (mp == NULL)
3508 		return (ESTALE);
3509 	/* now give me my vnode, it gets returned to me locked */
3510 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3511 	if (error)
3512 		return (error);
3513  	/*
3514 	 * from now on we have to make sure not
3515 	 * to forget about the vnode
3516 	 * any error that causes an abort must vput(vp)
3517 	 * just set error = err and 'goto bad;'.
3518 	 */
3519 
3520 	/*
3521 	 * from vn_open
3522 	 */
3523 	if (vp->v_type == VLNK) {
3524 		error = EMLINK;
3525 		goto bad;
3526 	}
3527 	if (vp->v_type == VSOCK) {
3528 		error = EOPNOTSUPP;
3529 		goto bad;
3530 	}
3531 	mode = 0;
3532 	if (fmode & (FWRITE | O_TRUNC)) {
3533 		if (vp->v_type == VDIR) {
3534 			error = EISDIR;
3535 			goto bad;
3536 		}
3537 		error = vn_writechk(vp);
3538 		if (error)
3539 			goto bad;
3540 		mode |= VWRITE;
3541 	}
3542 	if (fmode & FREAD)
3543 		mode |= VREAD;
3544 	if (fmode & O_APPEND)
3545 		mode |= VAPPEND;
3546 #ifdef MAC
3547 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3548 	if (error)
3549 		goto bad;
3550 #endif
3551 	if (mode) {
3552 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3553 		if (error)
3554 			goto bad;
3555 	}
3556 	if (fmode & O_TRUNC) {
3557 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3558 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3559 			vrele(vp);
3560 			return (error);
3561 		}
3562 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3563 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3564 #ifdef MAC
3565 		/*
3566 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3567 		 * should be right.
3568 		 */
3569 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3570 		if (error == 0) {
3571 #endif
3572 			VATTR_NULL(vap);
3573 			vap->va_size = 0;
3574 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3575 #ifdef MAC
3576 		}
3577 #endif
3578 		vn_finished_write(mp);
3579 		if (error)
3580 			goto bad;
3581 	}
3582 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3583 	if (error)
3584 		goto bad;
3585 	/*
3586 	 * Make sure that a VM object is created for VMIO support.
3587 	 */
3588 	if (vn_canvmio(vp) == TRUE) {
3589 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3590 			goto bad;
3591 	}
3592 	if (fmode & FWRITE)
3593 		vp->v_writecount++;
3594 
3595 	/*
3596 	 * end of vn_open code
3597 	 */
3598 
3599 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3600 		if (fmode & FWRITE)
3601 			vp->v_writecount--;
3602 		goto bad;
3603 	}
3604 	fp = nfp;
3605 
3606 	/*
3607 	 * Hold an extra reference to avoid having fp ripped out
3608 	 * from under us while we block in the lock op
3609 	 */
3610 	fhold(fp);
3611 	nfp->f_data = vp;
3612 	nfp->f_flag = fmode & FMASK;
3613 	nfp->f_ops = &vnops;
3614 	nfp->f_type = DTYPE_VNODE;
3615 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3616 		lf.l_whence = SEEK_SET;
3617 		lf.l_start = 0;
3618 		lf.l_len = 0;
3619 		if (fmode & O_EXLOCK)
3620 			lf.l_type = F_WRLCK;
3621 		else
3622 			lf.l_type = F_RDLCK;
3623 		type = F_FLOCK;
3624 		if ((fmode & FNONBLOCK) == 0)
3625 			type |= F_WAIT;
3626 		VOP_UNLOCK(vp, 0, td);
3627 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3628 			    type)) != 0) {
3629 			/*
3630 			 * The lock request failed.  Normally close the
3631 			 * descriptor but handle the case where someone might
3632 			 * have dup()d or close()d it when we weren't looking.
3633 			 */
3634 			FILEDESC_LOCK(fdp);
3635 			if (fdp->fd_ofiles[indx] == fp) {
3636 				fdp->fd_ofiles[indx] = NULL;
3637 				FILEDESC_UNLOCK(fdp);
3638 				fdrop(fp, td);
3639 			} else
3640 				FILEDESC_UNLOCK(fdp);
3641 			/*
3642 			 * release our private reference
3643 			 */
3644 			fdrop(fp, td);
3645 			return(error);
3646 		}
3647 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3648 		fp->f_flag |= FHASLOCK;
3649 	}
3650 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3651 		vfs_object_create(vp, td, td->td_ucred);
3652 
3653 	VOP_UNLOCK(vp, 0, td);
3654 	fdrop(fp, td);
3655 	td->td_retval[0] = indx;
3656 	return (0);
3657 
3658 bad:
3659 	vput(vp);
3660 	return (error);
3661 }
3662 
3663 /*
3664  * Stat an (NFS) file handle.
3665  */
3666 #ifndef _SYS_SYSPROTO_H_
3667 struct fhstat_args {
3668 	struct fhandle *u_fhp;
3669 	struct stat *sb;
3670 };
3671 #endif
3672 int
3673 fhstat(td, uap)
3674 	struct thread *td;
3675 	register struct fhstat_args /* {
3676 		syscallarg(struct fhandle *) u_fhp;
3677 		syscallarg(struct stat *) sb;
3678 	} */ *uap;
3679 {
3680 	struct stat sb;
3681 	fhandle_t fh;
3682 	struct mount *mp;
3683 	struct vnode *vp;
3684 	int error;
3685 
3686 	/*
3687 	 * Must be super user
3688 	 */
3689 	error = suser(td);
3690 	if (error)
3691 		return (error);
3692 
3693 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3694 	if (error)
3695 		return (error);
3696 
3697 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3698 		return (ESTALE);
3699 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3700 		return (error);
3701 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3702 	vput(vp);
3703 	if (error)
3704 		return (error);
3705 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3706 	return (error);
3707 }
3708 
3709 /*
3710  * Implement fstatfs() for (NFS) file handles.
3711  */
3712 #ifndef _SYS_SYSPROTO_H_
3713 struct fhstatfs_args {
3714 	struct fhandle *u_fhp;
3715 	struct statfs *buf;
3716 };
3717 #endif
3718 int
3719 fhstatfs(td, uap)
3720 	struct thread *td;
3721 	struct fhstatfs_args /* {
3722 		syscallarg(struct fhandle) *u_fhp;
3723 		syscallarg(struct statfs) *buf;
3724 	} */ *uap;
3725 {
3726 	struct statfs *sp;
3727 	struct mount *mp;
3728 	struct vnode *vp;
3729 	struct statfs sb;
3730 	fhandle_t fh;
3731 	int error;
3732 
3733 	/*
3734 	 * Must be super user
3735 	 */
3736 	error = suser(td);
3737 	if (error)
3738 		return (error);
3739 
3740 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3741 		return (error);
3742 
3743 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3744 		return (ESTALE);
3745 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3746 		return (error);
3747 	mp = vp->v_mount;
3748 	sp = &mp->mnt_stat;
3749 	vput(vp);
3750 #ifdef MAC
3751 	error = mac_check_mount_stat(td->td_ucred, mp);
3752 	if (error)
3753 		return (error);
3754 #endif
3755 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3756 		return (error);
3757 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3758 	if (suser(td)) {
3759 		bcopy(sp, &sb, sizeof(sb));
3760 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3761 		sp = &sb;
3762 	}
3763 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3764 }
3765 
3766 /*
3767  * Syscall to push extended attribute configuration information into the
3768  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3769  * a command (int cmd), and attribute name and misc data.  For now, the
3770  * attribute name is left in userspace for consumption by the VFS_op.
3771  * It will probably be changed to be copied into sysspace by the
3772  * syscall in the future, once issues with various consumers of the
3773  * attribute code have raised their hands.
3774  *
3775  * Currently this is used only by UFS Extended Attributes.
3776  */
3777 int
3778 extattrctl(td, uap)
3779 	struct thread *td;
3780 	struct extattrctl_args /* {
3781 		syscallarg(const char *) path;
3782 		syscallarg(int) cmd;
3783 		syscallarg(const char *) filename;
3784 		syscallarg(int) attrnamespace;
3785 		syscallarg(const char *) attrname;
3786 	} */ *uap;
3787 {
3788 	struct vnode *filename_vp;
3789 	struct nameidata nd;
3790 	struct mount *mp, *mp_writable;
3791 	char attrname[EXTATTR_MAXNAMELEN];
3792 	int error;
3793 
3794 	/*
3795 	 * uap->attrname is not always defined.  We check again later when we
3796 	 * invoke the VFS call so as to pass in NULL there if needed.
3797 	 */
3798 	if (uap->attrname != NULL) {
3799 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3800 		    NULL);
3801 		if (error)
3802 			return (error);
3803 	}
3804 
3805 	/*
3806 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3807 	 * which VFS_EXTATTRCTL() will later release.
3808 	 */
3809 	filename_vp = NULL;
3810 	if (uap->filename != NULL) {
3811 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3812 		    uap->filename, td);
3813 		error = namei(&nd);
3814 		if (error)
3815 			return (error);
3816 		filename_vp = nd.ni_vp;
3817 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3818 	}
3819 
3820 	/* uap->path is always defined. */
3821 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3822 	error = namei(&nd);
3823 	if (error) {
3824 		if (filename_vp != NULL)
3825 			vput(filename_vp);
3826 		return (error);
3827 	}
3828 	mp = nd.ni_vp->v_mount;
3829 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3830 	NDFREE(&nd, 0);
3831 	if (error) {
3832 		if (filename_vp != NULL)
3833 			vput(filename_vp);
3834 		return (error);
3835 	}
3836 
3837 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3838 	    uap->attrname != NULL ? attrname : NULL, td);
3839 
3840 	vn_finished_write(mp_writable);
3841 	/*
3842 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3843 	 * filename_vp, so vrele it if it is defined.
3844 	 */
3845 	if (filename_vp != NULL)
3846 		vrele(filename_vp);
3847 	return (error);
3848 }
3849 
3850 /*-
3851  * Set a named extended attribute on a file or directory
3852  *
3853  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3854  *            kernelspace string pointer "attrname", userspace buffer
3855  *            pointer "data", buffer length "nbytes", thread "td".
3856  * Returns: 0 on success, an error number otherwise
3857  * Locks: none
3858  * References: vp must be a valid reference for the duration of the call
3859  */
3860 static int
3861 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3862     void *data, size_t nbytes, struct thread *td)
3863 {
3864 	struct mount *mp;
3865 	struct uio auio;
3866 	struct iovec aiov;
3867 	ssize_t cnt;
3868 	int error;
3869 
3870 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3871 	if (error)
3872 		return (error);
3873 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3874 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3875 
3876 	aiov.iov_base = data;
3877 	aiov.iov_len = nbytes;
3878 	auio.uio_iov = &aiov;
3879 	auio.uio_iovcnt = 1;
3880 	auio.uio_offset = 0;
3881 	if (nbytes > INT_MAX) {
3882 		error = EINVAL;
3883 		goto done;
3884 	}
3885 	auio.uio_resid = nbytes;
3886 	auio.uio_rw = UIO_WRITE;
3887 	auio.uio_segflg = UIO_USERSPACE;
3888 	auio.uio_td = td;
3889 	cnt = nbytes;
3890 
3891 #ifdef MAC
3892 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3893 	    attrname, &auio);
3894 	if (error)
3895 		goto done;
3896 #endif
3897 
3898 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3899 	    td->td_ucred, td);
3900 	cnt -= auio.uio_resid;
3901 	td->td_retval[0] = cnt;
3902 
3903 done:
3904 	VOP_UNLOCK(vp, 0, td);
3905 	vn_finished_write(mp);
3906 	return (error);
3907 }
3908 
3909 int
3910 extattr_set_fd(td, uap)
3911 	struct thread *td;
3912 	struct extattr_set_fd_args /* {
3913 		syscallarg(int) fd;
3914 		syscallarg(int) attrnamespace;
3915 		syscallarg(const char *) attrname;
3916 		syscallarg(void *) data;
3917 		syscallarg(size_t) nbytes;
3918 	} */ *uap;
3919 {
3920 	struct file *fp;
3921 	char attrname[EXTATTR_MAXNAMELEN];
3922 	int error;
3923 
3924 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3925 	if (error)
3926 		return (error);
3927 
3928 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3929 	if (error)
3930 		return (error);
3931 
3932 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3933 	    attrname, uap->data, uap->nbytes, td);
3934 	fdrop(fp, td);
3935 
3936 	return (error);
3937 }
3938 
3939 int
3940 extattr_set_file(td, uap)
3941 	struct thread *td;
3942 	struct extattr_set_file_args /* {
3943 		syscallarg(const char *) path;
3944 		syscallarg(int) attrnamespace;
3945 		syscallarg(const char *) attrname;
3946 		syscallarg(void *) data;
3947 		syscallarg(size_t) nbytes;
3948 	} */ *uap;
3949 {
3950 	struct nameidata nd;
3951 	char attrname[EXTATTR_MAXNAMELEN];
3952 	int error;
3953 
3954 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3955 	if (error)
3956 		return (error);
3957 
3958 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3959 	error = namei(&nd);
3960 	if (error)
3961 		return (error);
3962 	NDFREE(&nd, NDF_ONLY_PNBUF);
3963 
3964 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3965 	    uap->data, uap->nbytes, td);
3966 
3967 	vrele(nd.ni_vp);
3968 	return (error);
3969 }
3970 
3971 int
3972 extattr_set_link(td, uap)
3973 	struct thread *td;
3974 	struct extattr_set_link_args /* {
3975 		syscallarg(const char *) path;
3976 		syscallarg(int) attrnamespace;
3977 		syscallarg(const char *) attrname;
3978 		syscallarg(void *) data;
3979 		syscallarg(size_t) nbytes;
3980 	} */ *uap;
3981 {
3982 	struct nameidata nd;
3983 	char attrname[EXTATTR_MAXNAMELEN];
3984 	int error;
3985 
3986 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3987 	if (error)
3988 		return (error);
3989 
3990 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
3991 	error = namei(&nd);
3992 	if (error)
3993 		return (error);
3994 	NDFREE(&nd, NDF_ONLY_PNBUF);
3995 
3996 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3997 	    uap->data, uap->nbytes, td);
3998 
3999 	vrele(nd.ni_vp);
4000 	return (error);
4001 }
4002 
4003 /*-
4004  * Get a named extended attribute on a file or directory
4005  *
4006  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4007  *            kernelspace string pointer "attrname", userspace buffer
4008  *            pointer "data", buffer length "nbytes", thread "td".
4009  * Returns: 0 on success, an error number otherwise
4010  * Locks: none
4011  * References: vp must be a valid reference for the duration of the call
4012  */
4013 static int
4014 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4015     void *data, size_t nbytes, struct thread *td)
4016 {
4017 	struct uio auio, *auiop;
4018 	struct iovec aiov;
4019 	ssize_t cnt;
4020 	size_t size, *sizep;
4021 	int error;
4022 
4023 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4024 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4025 
4026 	/*
4027 	 * Slightly unusual semantics: if the user provides a NULL data
4028 	 * pointer, they don't want to receive the data, just the
4029 	 * maximum read length.
4030 	 */
4031 	auiop = NULL;
4032 	sizep = NULL;
4033 	cnt = 0;
4034 	if (data != NULL) {
4035 		aiov.iov_base = data;
4036 		aiov.iov_len = nbytes;
4037 		auio.uio_iov = &aiov;
4038 		auio.uio_offset = 0;
4039 		if (nbytes > INT_MAX) {
4040 			error = EINVAL;
4041 			goto done;
4042 		}
4043 		auio.uio_resid = nbytes;
4044 		auio.uio_rw = UIO_READ;
4045 		auio.uio_segflg = UIO_USERSPACE;
4046 		auio.uio_td = td;
4047 		auiop = &auio;
4048 		cnt = nbytes;
4049 	} else
4050 		sizep = &size;
4051 
4052 #ifdef MAC
4053 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4054 	    attrname, &auio);
4055 	if (error)
4056 		goto done;
4057 #endif
4058 
4059 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4060 	    td->td_ucred, td);
4061 
4062 	if (auiop != NULL) {
4063 		cnt -= auio.uio_resid;
4064 		td->td_retval[0] = cnt;
4065 	} else
4066 		td->td_retval[0] = size;
4067 
4068 done:
4069 	VOP_UNLOCK(vp, 0, td);
4070 	return (error);
4071 }
4072 
4073 int
4074 extattr_get_fd(td, uap)
4075 	struct thread *td;
4076 	struct extattr_get_fd_args /* {
4077 		syscallarg(int) fd;
4078 		syscallarg(int) attrnamespace;
4079 		syscallarg(const char *) attrname;
4080 		syscallarg(void *) data;
4081 		syscallarg(size_t) nbytes;
4082 	} */ *uap;
4083 {
4084 	struct file *fp;
4085 	char attrname[EXTATTR_MAXNAMELEN];
4086 	int error;
4087 
4088 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4089 	if (error)
4090 		return (error);
4091 
4092 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4093 	if (error)
4094 		return (error);
4095 
4096 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4097 	    attrname, uap->data, uap->nbytes, td);
4098 
4099 	fdrop(fp, td);
4100 	return (error);
4101 }
4102 
4103 int
4104 extattr_get_file(td, uap)
4105 	struct thread *td;
4106 	struct extattr_get_file_args /* {
4107 		syscallarg(const char *) path;
4108 		syscallarg(int) attrnamespace;
4109 		syscallarg(const char *) attrname;
4110 		syscallarg(void *) data;
4111 		syscallarg(size_t) nbytes;
4112 	} */ *uap;
4113 {
4114 	struct nameidata nd;
4115 	char attrname[EXTATTR_MAXNAMELEN];
4116 	int error;
4117 
4118 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4119 	if (error)
4120 		return (error);
4121 
4122 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4123 	error = namei(&nd);
4124 	if (error)
4125 		return (error);
4126 	NDFREE(&nd, NDF_ONLY_PNBUF);
4127 
4128 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4129 	    uap->data, uap->nbytes, td);
4130 
4131 	vrele(nd.ni_vp);
4132 	return (error);
4133 }
4134 
4135 int
4136 extattr_get_link(td, uap)
4137 	struct thread *td;
4138 	struct extattr_get_link_args /* {
4139 		syscallarg(const char *) path;
4140 		syscallarg(int) attrnamespace;
4141 		syscallarg(const char *) attrname;
4142 		syscallarg(void *) data;
4143 		syscallarg(size_t) nbytes;
4144 	} */ *uap;
4145 {
4146 	struct nameidata nd;
4147 	char attrname[EXTATTR_MAXNAMELEN];
4148 	int error;
4149 
4150 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4151 	if (error)
4152 		return (error);
4153 
4154 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4155 	error = namei(&nd);
4156 	if (error)
4157 		return (error);
4158 	NDFREE(&nd, NDF_ONLY_PNBUF);
4159 
4160 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4161 	    uap->data, uap->nbytes, td);
4162 
4163 	vrele(nd.ni_vp);
4164 	return (error);
4165 }
4166 
4167 /*
4168  * extattr_delete_vp(): Delete a named extended attribute on a file or
4169  *                      directory
4170  *
4171  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4172  *            kernelspace string pointer "attrname", proc "p"
4173  * Returns: 0 on success, an error number otherwise
4174  * Locks: none
4175  * References: vp must be a valid reference for the duration of the call
4176  */
4177 static int
4178 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4179     struct thread *td)
4180 {
4181 	struct mount *mp;
4182 	int error;
4183 
4184 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4185 	if (error)
4186 		return (error);
4187 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4188 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4189 
4190 #ifdef MAC
4191 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4192 	    attrname, NULL);
4193 	if (error)
4194 		goto done;
4195 #endif
4196 
4197 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4198 	    td);
4199 #ifdef MAC
4200 done:
4201 #endif
4202 	VOP_UNLOCK(vp, 0, td);
4203 	vn_finished_write(mp);
4204 	return (error);
4205 }
4206 
4207 int
4208 extattr_delete_fd(td, uap)
4209 	struct thread *td;
4210 	struct extattr_delete_fd_args /* {
4211 		syscallarg(int) fd;
4212 		syscallarg(int) attrnamespace;
4213 		syscallarg(const char *) attrname;
4214 	} */ *uap;
4215 {
4216 	struct file *fp;
4217 	struct vnode *vp;
4218 	char attrname[EXTATTR_MAXNAMELEN];
4219 	int error;
4220 
4221 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4222 	if (error)
4223 		return (error);
4224 
4225 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4226 	if (error)
4227 		return (error);
4228 	vp = (struct vnode *)fp->f_data;
4229 
4230 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4231 	fdrop(fp, td);
4232 	return (error);
4233 }
4234 
4235 int
4236 extattr_delete_file(td, uap)
4237 	struct thread *td;
4238 	struct extattr_delete_file_args /* {
4239 		syscallarg(const char *) path;
4240 		syscallarg(int) attrnamespace;
4241 		syscallarg(const char *) attrname;
4242 	} */ *uap;
4243 {
4244 	struct nameidata nd;
4245 	char attrname[EXTATTR_MAXNAMELEN];
4246 	int error;
4247 
4248 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4249 	if (error)
4250 		return(error);
4251 
4252 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4253 	error = namei(&nd);
4254 	if (error)
4255 		return(error);
4256 	NDFREE(&nd, NDF_ONLY_PNBUF);
4257 
4258 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4259 	vrele(nd.ni_vp);
4260 	return(error);
4261 }
4262 
4263 int
4264 extattr_delete_link(td, uap)
4265 	struct thread *td;
4266 	struct extattr_delete_link_args /* {
4267 		syscallarg(const char *) path;
4268 		syscallarg(int) attrnamespace;
4269 		syscallarg(const char *) attrname;
4270 	} */ *uap;
4271 {
4272 	struct nameidata nd;
4273 	char attrname[EXTATTR_MAXNAMELEN];
4274 	int error;
4275 
4276 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4277 	if (error)
4278 		return(error);
4279 
4280 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4281 	error = namei(&nd);
4282 	if (error)
4283 		return(error);
4284 	NDFREE(&nd, NDF_ONLY_PNBUF);
4285 
4286 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4287 	vrele(nd.ni_vp);
4288 	return(error);
4289 }
4290