xref: /freebsd/sys/kern/vfs_extattr.c (revision 71fe318b852b8dfb3e799cb12ef184750f7f8eac)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/mac.h>
52 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/linker.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/proc.h>
67 #include <sys/dirent.h>
68 #include <sys/extattr.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 
73 #include <machine/limits.h>
74 #include <machine/stdarg.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int change_dir(struct nameidata *ndp, struct thread *td);
82 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
83 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
84 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
85 static int setfmode(struct thread *td, struct vnode *, int);
86 static int setfflags(struct thread *td, struct vnode *, int);
87 static int setutimes(struct thread *td, struct vnode *,
88     const struct timespec *, int, int);
89 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90     struct thread *td);
91 
92 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93 int (*softdep_fsync_hook)(struct vnode *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108         int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int asyncflag;
125 
126 	mtx_lock(&mountlist_mtx);
127 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129 			nmp = TAILQ_NEXT(mp, mnt_list);
130 			continue;
131 		}
132 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
133 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
134 			asyncflag = mp->mnt_flag & MNT_ASYNC;
135 			mp->mnt_flag &= ~MNT_ASYNC;
136 			vfs_msync(mp, MNT_NOWAIT);
137 			VFS_SYNC(mp, MNT_NOWAIT,
138 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
139 			mp->mnt_flag |= asyncflag;
140 			vn_finished_write(mp);
141 		}
142 		mtx_lock(&mountlist_mtx);
143 		nmp = TAILQ_NEXT(mp, mnt_list);
144 		vfs_unbusy(mp, td);
145 	}
146 	mtx_unlock(&mountlist_mtx);
147 #if 0
148 /*
149  * XXX don't call vfs_bufstats() yet because that routine
150  * was not imported in the Lite2 merge.
151  */
152 #ifdef DIAGNOSTIC
153 	if (syncprt)
154 		vfs_bufstats();
155 #endif /* DIAGNOSTIC */
156 #endif
157 	return (0);
158 }
159 
160 /* XXX PRISON: could be per prison flag */
161 static int prison_quotas;
162 #if 0
163 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
164 #endif
165 
166 /*
167  * Change filesystem quotas.
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 /* ARGSUSED */
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		syscallarg(char *) path;
183 		syscallarg(int) cmd;
184 		syscallarg(int) uid;
185 		syscallarg(caddr_t) arg;
186 	} */ *uap;
187 {
188 	struct mount *mp;
189 	int error;
190 	struct nameidata nd;
191 
192 	if (jailed(td->td_ucred) && !prison_quotas)
193 		return (EPERM);
194 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
195 	if ((error = namei(&nd)) != 0)
196 		return (error);
197 	NDFREE(&nd, NDF_ONLY_PNBUF);
198 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
199 	vrele(nd.ni_vp);
200 	if (error)
201 		return (error);
202 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
203 	    SCARG(uap, arg), td);
204 	vn_finished_write(mp);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 /* ARGSUSED */
218 int
219 statfs(td, uap)
220 	struct thread *td;
221 	register struct statfs_args /* {
222 		syscallarg(char *) path;
223 		syscallarg(struct statfs *) buf;
224 	} */ *uap;
225 {
226 	register struct mount *mp;
227 	register struct statfs *sp;
228 	int error;
229 	struct nameidata nd;
230 	struct statfs sb;
231 
232 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
233 	if ((error = namei(&nd)) != 0)
234 		return (error);
235 	mp = nd.ni_vp->v_mount;
236 	sp = &mp->mnt_stat;
237 	NDFREE(&nd, NDF_ONLY_PNBUF);
238 	vrele(nd.ni_vp);
239 #ifdef MAC
240 	error = mac_check_mount_stat(td->td_ucred, mp);
241 	if (error)
242 		return (error);
243 #endif
244 	error = VFS_STATFS(mp, sp, td);
245 	if (error)
246 		return (error);
247 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
248 	if (suser(td)) {
249 		bcopy(sp, &sb, sizeof(sb));
250 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
251 		sp = &sb;
252 	}
253 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
254 }
255 
256 /*
257  * Get filesystem statistics.
258  */
259 #ifndef _SYS_SYSPROTO_H_
260 struct fstatfs_args {
261 	int fd;
262 	struct statfs *buf;
263 };
264 #endif
265 /* ARGSUSED */
266 int
267 fstatfs(td, uap)
268 	struct thread *td;
269 	register struct fstatfs_args /* {
270 		syscallarg(int) fd;
271 		syscallarg(struct statfs *) buf;
272 	} */ *uap;
273 {
274 	struct file *fp;
275 	struct mount *mp;
276 	register struct statfs *sp;
277 	int error;
278 	struct statfs sb;
279 
280 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
281 		return (error);
282 	mp = ((struct vnode *)fp->f_data)->v_mount;
283 	fdrop(fp, td);
284 	if (mp == NULL)
285 		return (EBADF);
286 #ifdef MAC
287 	error = mac_check_mount_stat(td->td_ucred, mp);
288 	if (error)
289 		return (error);
290 #endif
291 	sp = &mp->mnt_stat;
292 	error = VFS_STATFS(mp, sp, td);
293 	if (error)
294 		return (error);
295 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
296 	if (suser(td)) {
297 		bcopy(sp, &sb, sizeof(sb));
298 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
299 		sp = &sb;
300 	}
301 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
302 }
303 
304 /*
305  * Get statistics on all filesystems.
306  */
307 #ifndef _SYS_SYSPROTO_H_
308 struct getfsstat_args {
309 	struct statfs *buf;
310 	long bufsize;
311 	int flags;
312 };
313 #endif
314 int
315 getfsstat(td, uap)
316 	struct thread *td;
317 	register struct getfsstat_args /* {
318 		syscallarg(struct statfs *) buf;
319 		syscallarg(long) bufsize;
320 		syscallarg(int) flags;
321 	} */ *uap;
322 {
323 	register struct mount *mp, *nmp;
324 	register struct statfs *sp;
325 	caddr_t sfsp;
326 	long count, maxcount, error;
327 
328 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
329 	sfsp = (caddr_t)SCARG(uap, buf);
330 	count = 0;
331 	mtx_lock(&mountlist_mtx);
332 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
333 #ifdef MAC
334 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
335 			nmp = TAILQ_NEXT(mp, mnt_list);
336 			continue;
337 		}
338 #endif
339 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
340 			nmp = TAILQ_NEXT(mp, mnt_list);
341 			continue;
342 		}
343 		if (sfsp && count < maxcount) {
344 			sp = &mp->mnt_stat;
345 			/*
346 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
347 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
348 			 * overrides MNT_WAIT.
349 			 */
350 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
351 			    (SCARG(uap, flags) & MNT_WAIT)) &&
352 			    (error = VFS_STATFS(mp, sp, td))) {
353 				mtx_lock(&mountlist_mtx);
354 				nmp = TAILQ_NEXT(mp, mnt_list);
355 				vfs_unbusy(mp, td);
356 				continue;
357 			}
358 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
359 			error = copyout(sp, sfsp, sizeof(*sp));
360 			if (error) {
361 				vfs_unbusy(mp, td);
362 				return (error);
363 			}
364 			sfsp += sizeof(*sp);
365 		}
366 		count++;
367 		mtx_lock(&mountlist_mtx);
368 		nmp = TAILQ_NEXT(mp, mnt_list);
369 		vfs_unbusy(mp, td);
370 	}
371 	mtx_unlock(&mountlist_mtx);
372 	if (sfsp && count > maxcount)
373 		td->td_retval[0] = maxcount;
374 	else
375 		td->td_retval[0] = count;
376 	return (0);
377 }
378 
379 /*
380  * Change current working directory to a given file descriptor.
381  */
382 #ifndef _SYS_SYSPROTO_H_
383 struct fchdir_args {
384 	int	fd;
385 };
386 #endif
387 /* ARGSUSED */
388 int
389 fchdir(td, uap)
390 	struct thread *td;
391 	struct fchdir_args /* {
392 		syscallarg(int) fd;
393 	} */ *uap;
394 {
395 	register struct filedesc *fdp = td->td_proc->p_fd;
396 	struct vnode *vp, *tdp, *vpold;
397 	struct mount *mp;
398 	struct file *fp;
399 	int error;
400 
401 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
402 		return (error);
403 	vp = (struct vnode *)fp->f_data;
404 	VREF(vp);
405 	fdrop(fp, td);
406 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
407 	if (vp->v_type != VDIR)
408 		error = ENOTDIR;
409 #ifdef MAC
410 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
411 	}
412 #endif
413 	else
414 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
415 	while (!error && (mp = vp->v_mountedhere) != NULL) {
416 		if (vfs_busy(mp, 0, 0, td))
417 			continue;
418 		error = VFS_ROOT(mp, &tdp);
419 		vfs_unbusy(mp, td);
420 		if (error)
421 			break;
422 		vput(vp);
423 		vp = tdp;
424 	}
425 	if (error) {
426 		vput(vp);
427 		return (error);
428 	}
429 	VOP_UNLOCK(vp, 0, td);
430 	FILEDESC_LOCK(fdp);
431 	vpold = fdp->fd_cdir;
432 	fdp->fd_cdir = vp;
433 	FILEDESC_UNLOCK(fdp);
434 	vrele(vpold);
435 	return (0);
436 }
437 
438 /*
439  * Change current working directory (``.'').
440  */
441 #ifndef _SYS_SYSPROTO_H_
442 struct chdir_args {
443 	char	*path;
444 };
445 #endif
446 /* ARGSUSED */
447 int
448 chdir(td, uap)
449 	struct thread *td;
450 	struct chdir_args /* {
451 		syscallarg(char *) path;
452 	} */ *uap;
453 {
454 
455 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
456 }
457 
458 int
459 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
460 {
461 	register struct filedesc *fdp = td->td_proc->p_fd;
462 	int error;
463 	struct nameidata nd;
464 	struct vnode *vp;
465 
466 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
467 	if ((error = change_dir(&nd, td)) != 0)
468 		return (error);
469 	NDFREE(&nd, NDF_ONLY_PNBUF);
470 	FILEDESC_LOCK(fdp);
471 	vp = fdp->fd_cdir;
472 	fdp->fd_cdir = nd.ni_vp;
473 	FILEDESC_UNLOCK(fdp);
474 	vrele(vp);
475 	return (0);
476 }
477 
478 /*
479  * Helper function for raised chroot(2) security function:  Refuse if
480  * any filedescriptors are open directories.
481  */
482 static int
483 chroot_refuse_vdir_fds(fdp)
484 	struct filedesc *fdp;
485 {
486 	struct vnode *vp;
487 	struct file *fp;
488 	int fd;
489 
490 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
491 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
492 		fp = fget_locked(fdp, fd);
493 		if (fp == NULL)
494 			continue;
495 		if (fp->f_type == DTYPE_VNODE) {
496 			vp = (struct vnode *)fp->f_data;
497 			if (vp->v_type == VDIR)
498 				return (EPERM);
499 		}
500 	}
501 	return (0);
502 }
503 
504 /*
505  * This sysctl determines if we will allow a process to chroot(2) if it
506  * has a directory open:
507  *	0: disallowed for all processes.
508  *	1: allowed for processes that were not already chroot(2)'ed.
509  *	2: allowed for all processes.
510  */
511 
512 static int chroot_allow_open_directories = 1;
513 
514 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
515      &chroot_allow_open_directories, 0, "");
516 
517 /*
518  * Change notion of root (``/'') directory.
519  */
520 #ifndef _SYS_SYSPROTO_H_
521 struct chroot_args {
522 	char	*path;
523 };
524 #endif
525 /* ARGSUSED */
526 int
527 chroot(td, uap)
528 	struct thread *td;
529 	struct chroot_args /* {
530 		syscallarg(char *) path;
531 	} */ *uap;
532 {
533 	register struct filedesc *fdp = td->td_proc->p_fd;
534 	int error;
535 	struct nameidata nd;
536 	struct vnode *vp;
537 
538 	error = suser_cred(td->td_ucred, PRISON_ROOT);
539 	if (error)
540 		return (error);
541 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
542 	    SCARG(uap, path), td);
543 	mtx_lock(&Giant);
544 	if ((error = change_dir(&nd, td)) != 0)
545 		goto error;
546 #ifdef MAC
547 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
548 		goto error;
549 #endif
550 	FILEDESC_LOCK(fdp);
551 	if (chroot_allow_open_directories == 0 ||
552 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
553 		error = chroot_refuse_vdir_fds(fdp);
554 		if (error)
555 			goto error_unlock;
556 	}
557 	vp = fdp->fd_rdir;
558 	fdp->fd_rdir = nd.ni_vp;
559 	if (!fdp->fd_jdir) {
560 		fdp->fd_jdir = nd.ni_vp;
561                 VREF(fdp->fd_jdir);
562 	}
563 	FILEDESC_UNLOCK(fdp);
564 	NDFREE(&nd, NDF_ONLY_PNBUF);
565 	vrele(vp);
566 	mtx_unlock(&Giant);
567 	return (0);
568 error_unlock:
569 	FILEDESC_UNLOCK(fdp);
570 error:
571 	mtx_unlock(&Giant);
572 	NDFREE(&nd, 0);
573 	return (error);
574 }
575 
576 /*
577  * Common routine for chroot and chdir.
578  */
579 static int
580 change_dir(ndp, td)
581 	register struct nameidata *ndp;
582 	struct thread *td;
583 {
584 	struct vnode *vp;
585 	int error;
586 
587 	error = namei(ndp);
588 	if (error)
589 		return (error);
590 	vp = ndp->ni_vp;
591 	if (vp->v_type != VDIR)
592 		error = ENOTDIR;
593 #ifdef MAC
594 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
595 	}
596 #endif
597 	else
598 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
599 	if (error)
600 		vput(vp);
601 	else
602 		VOP_UNLOCK(vp, 0, td);
603 	return (error);
604 }
605 
606 /*
607  * Check permissions, allocate an open file structure,
608  * and call the device open routine if any.
609  */
610 #ifndef _SYS_SYSPROTO_H_
611 struct open_args {
612 	char	*path;
613 	int	flags;
614 	int	mode;
615 };
616 #endif
617 int
618 open(td, uap)
619 	struct thread *td;
620 	register struct open_args /* {
621 		syscallarg(char *) path;
622 		syscallarg(int) flags;
623 		syscallarg(int) mode;
624 	} */ *uap;
625 {
626 
627 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
628 }
629 
630 int
631 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
632     int mode)
633 {
634 	struct proc *p = td->td_proc;
635 	struct filedesc *fdp = p->p_fd;
636 	struct file *fp;
637 	struct vnode *vp;
638 	struct vattr vat;
639 	struct mount *mp;
640 	int cmode, oflags;
641 	struct file *nfp;
642 	int type, indx, error;
643 	struct flock lf;
644 	struct nameidata nd;
645 
646 	if ((flags & O_ACCMODE) == O_ACCMODE)
647 		return (EINVAL);
648 	oflags = flags;
649 	flags = FFLAGS(flags);
650 	error = falloc(td, &nfp, &indx);
651 	if (error)
652 		return (error);
653 	fp = nfp;
654 	FILEDESC_LOCK(fdp);
655 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
656 	FILEDESC_UNLOCK(fdp);
657 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
658 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
659 	/*
660 	 * Bump the ref count to prevent another process from closing
661 	 * the descriptor while we are blocked in vn_open()
662 	 */
663 	fhold(fp);
664 	error = vn_open(&nd, &flags, cmode);
665 	if (error) {
666 		/*
667 		 * release our own reference
668 		 */
669 		fdrop(fp, td);
670 
671 		/*
672 		 * handle special fdopen() case.  bleh.  dupfdopen() is
673 		 * responsible for dropping the old contents of ofiles[indx]
674 		 * if it succeeds.
675 		 */
676 		if ((error == ENODEV || error == ENXIO) &&
677 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
678 		    (error =
679 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
680 			td->td_retval[0] = indx;
681 			return (0);
682 		}
683 		/*
684 		 * Clean up the descriptor, but only if another thread hadn't
685 		 * replaced or closed it.
686 		 */
687 		FILEDESC_LOCK(fdp);
688 		if (fdp->fd_ofiles[indx] == fp) {
689 			fdp->fd_ofiles[indx] = NULL;
690 			FILEDESC_UNLOCK(fdp);
691 			fdrop(fp, td);
692 		} else
693 			FILEDESC_UNLOCK(fdp);
694 
695 		if (error == ERESTART)
696 			error = EINTR;
697 		return (error);
698 	}
699 	td->td_dupfd = 0;
700 	NDFREE(&nd, NDF_ONLY_PNBUF);
701 	vp = nd.ni_vp;
702 
703 	/*
704 	 * There should be 2 references on the file, one from the descriptor
705 	 * table, and one for us.
706 	 *
707 	 * Handle the case where someone closed the file (via its file
708 	 * descriptor) while we were blocked.  The end result should look
709 	 * like opening the file succeeded but it was immediately closed.
710 	 */
711 	FILEDESC_LOCK(fdp);
712 	FILE_LOCK(fp);
713 	if (fp->f_count == 1) {
714 		KASSERT(fdp->fd_ofiles[indx] != fp,
715 		    ("Open file descriptor lost all refs"));
716 		FILEDESC_UNLOCK(fdp);
717 		FILE_UNLOCK(fp);
718 		VOP_UNLOCK(vp, 0, td);
719 		vn_close(vp, flags & FMASK, fp->f_cred, td);
720 		fdrop(fp, td);
721 		td->td_retval[0] = indx;
722 		return 0;
723 	}
724 
725 	/* assert that vn_open created a backing object if one is needed */
726 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
727 		("open: vmio vnode has no backing object after vn_open"));
728 
729 	fp->f_data = vp;
730 	fp->f_flag = flags & FMASK;
731 	fp->f_ops = &vnops;
732 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
733 	FILEDESC_UNLOCK(fdp);
734 	FILE_UNLOCK(fp);
735 	VOP_UNLOCK(vp, 0, td);
736 	if (flags & (O_EXLOCK | O_SHLOCK)) {
737 		lf.l_whence = SEEK_SET;
738 		lf.l_start = 0;
739 		lf.l_len = 0;
740 		if (flags & O_EXLOCK)
741 			lf.l_type = F_WRLCK;
742 		else
743 			lf.l_type = F_RDLCK;
744 		type = F_FLOCK;
745 		if ((flags & FNONBLOCK) == 0)
746 			type |= F_WAIT;
747 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
748 			    type)) != 0)
749 			goto bad;
750 		fp->f_flag |= FHASLOCK;
751 	}
752 	if (flags & O_TRUNC) {
753 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
754 			goto bad;
755 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
756 		VATTR_NULL(&vat);
757 		vat.va_size = 0;
758 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
759 #ifdef MAC
760 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
761 		if (error == 0)
762 #endif
763 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
764 		VOP_UNLOCK(vp, 0, td);
765 		vn_finished_write(mp);
766 		if (error)
767 			goto bad;
768 	}
769 	/*
770 	 * Release our private reference, leaving the one associated with
771 	 * the descriptor table intact.
772 	 */
773 	fdrop(fp, td);
774 	td->td_retval[0] = indx;
775 	return (0);
776 bad:
777 	FILEDESC_LOCK(fdp);
778 	if (fdp->fd_ofiles[indx] == fp) {
779 		fdp->fd_ofiles[indx] = NULL;
780 		FILEDESC_UNLOCK(fdp);
781 		fdrop(fp, td);
782 	} else
783 		FILEDESC_UNLOCK(fdp);
784 	fdrop(fp, td);
785 	return (error);
786 }
787 
788 #ifdef COMPAT_43
789 /*
790  * Create a file.
791  */
792 #ifndef _SYS_SYSPROTO_H_
793 struct ocreat_args {
794 	char	*path;
795 	int	mode;
796 };
797 #endif
798 int
799 ocreat(td, uap)
800 	struct thread *td;
801 	register struct ocreat_args /* {
802 		syscallarg(char *) path;
803 		syscallarg(int) mode;
804 	} */ *uap;
805 {
806 	struct open_args /* {
807 		syscallarg(char *) path;
808 		syscallarg(int) flags;
809 		syscallarg(int) mode;
810 	} */ nuap;
811 
812 	SCARG(&nuap, path) = SCARG(uap, path);
813 	SCARG(&nuap, mode) = SCARG(uap, mode);
814 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
815 	return (open(td, &nuap));
816 }
817 #endif /* COMPAT_43 */
818 
819 /*
820  * Create a special file.
821  */
822 #ifndef _SYS_SYSPROTO_H_
823 struct mknod_args {
824 	char	*path;
825 	int	mode;
826 	int	dev;
827 };
828 #endif
829 /* ARGSUSED */
830 int
831 mknod(td, uap)
832 	struct thread *td;
833 	register struct mknod_args /* {
834 		syscallarg(char *) path;
835 		syscallarg(int) mode;
836 		syscallarg(int) dev;
837 	} */ *uap;
838 {
839 
840 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
841 }
842 
843 int
844 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
845     int dev)
846 {
847 	struct vnode *vp;
848 	struct mount *mp;
849 	struct vattr vattr;
850 	int error;
851 	int whiteout = 0;
852 	struct nameidata nd;
853 
854 	switch (mode & S_IFMT) {
855 	case S_IFCHR:
856 	case S_IFBLK:
857 		error = suser(td);
858 		break;
859 	default:
860 		error = suser_cred(td->td_ucred, PRISON_ROOT);
861 		break;
862 	}
863 	if (error)
864 		return (error);
865 restart:
866 	bwillwrite();
867 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
868 	if ((error = namei(&nd)) != 0)
869 		return (error);
870 	vp = nd.ni_vp;
871 	if (vp != NULL) {
872 		vrele(vp);
873 		error = EEXIST;
874 	} else {
875 		VATTR_NULL(&vattr);
876 		FILEDESC_LOCK(td->td_proc->p_fd);
877 		vattr.va_mode = (mode & ALLPERMS) &
878 		    ~td->td_proc->p_fd->fd_cmask;
879 		FILEDESC_UNLOCK(td->td_proc->p_fd);
880 		vattr.va_rdev = dev;
881 		whiteout = 0;
882 
883 		switch (mode & S_IFMT) {
884 		case S_IFMT:	/* used by badsect to flag bad sectors */
885 			vattr.va_type = VBAD;
886 			break;
887 		case S_IFCHR:
888 			vattr.va_type = VCHR;
889 			break;
890 		case S_IFBLK:
891 			vattr.va_type = VBLK;
892 			break;
893 		case S_IFWHT:
894 			whiteout = 1;
895 			break;
896 		default:
897 			error = EINVAL;
898 			break;
899 		}
900 	}
901 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
902 		NDFREE(&nd, NDF_ONLY_PNBUF);
903 		vput(nd.ni_dvp);
904 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
905 			return (error);
906 		goto restart;
907 	}
908 #ifdef MAC
909 	if (error == 0 && !whiteout)
910 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
911 		    &nd.ni_cnd, &vattr);
912 #endif
913 	if (!error) {
914 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
915 		if (whiteout)
916 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
917 		else {
918 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
919 						&nd.ni_cnd, &vattr);
920 			if (error == 0)
921 				vput(nd.ni_vp);
922 		}
923 	}
924 	NDFREE(&nd, NDF_ONLY_PNBUF);
925 	vput(nd.ni_dvp);
926 	vn_finished_write(mp);
927 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
928 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
929 	return (error);
930 }
931 
932 /*
933  * Create a named pipe.
934  */
935 #ifndef _SYS_SYSPROTO_H_
936 struct mkfifo_args {
937 	char	*path;
938 	int	mode;
939 };
940 #endif
941 /* ARGSUSED */
942 int
943 mkfifo(td, uap)
944 	struct thread *td;
945 	register struct mkfifo_args /* {
946 		syscallarg(char *) path;
947 		syscallarg(int) mode;
948 	} */ *uap;
949 {
950 
951 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
952 }
953 
954 int
955 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
956 {
957 	struct mount *mp;
958 	struct vattr vattr;
959 	int error;
960 	struct nameidata nd;
961 
962 restart:
963 	bwillwrite();
964 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
965 	if ((error = namei(&nd)) != 0)
966 		return (error);
967 	if (nd.ni_vp != NULL) {
968 		NDFREE(&nd, NDF_ONLY_PNBUF);
969 		vrele(nd.ni_vp);
970 		vput(nd.ni_dvp);
971 		return (EEXIST);
972 	}
973 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
974 		NDFREE(&nd, NDF_ONLY_PNBUF);
975 		vput(nd.ni_dvp);
976 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
977 			return (error);
978 		goto restart;
979 	}
980 	VATTR_NULL(&vattr);
981 	vattr.va_type = VFIFO;
982 	FILEDESC_LOCK(td->td_proc->p_fd);
983 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
984 	FILEDESC_UNLOCK(td->td_proc->p_fd);
985 #ifdef MAC
986 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
987 	    &vattr);
988 	if (error)
989 		goto out;
990 #endif
991 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
992 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
993 	if (error == 0)
994 		vput(nd.ni_vp);
995 #ifdef MAC
996 out:
997 #endif
998 	NDFREE(&nd, NDF_ONLY_PNBUF);
999 	vput(nd.ni_dvp);
1000 	vn_finished_write(mp);
1001 	return (error);
1002 }
1003 
1004 /*
1005  * Make a hard file link.
1006  */
1007 #ifndef _SYS_SYSPROTO_H_
1008 struct link_args {
1009 	char	*path;
1010 	char	*link;
1011 };
1012 #endif
1013 /* ARGSUSED */
1014 int
1015 link(td, uap)
1016 	struct thread *td;
1017 	register struct link_args /* {
1018 		syscallarg(char *) path;
1019 		syscallarg(char *) link;
1020 	} */ *uap;
1021 {
1022 
1023 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1024 }
1025 
1026 int
1027 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1028 {
1029 	struct vnode *vp;
1030 	struct mount *mp;
1031 	struct nameidata nd;
1032 	int error;
1033 
1034 	bwillwrite();
1035 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1036 	if ((error = namei(&nd)) != 0)
1037 		return (error);
1038 	NDFREE(&nd, NDF_ONLY_PNBUF);
1039 	vp = nd.ni_vp;
1040 	if (vp->v_type == VDIR) {
1041 		vrele(vp);
1042 		return (EPERM);		/* POSIX */
1043 	}
1044 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1045 		vrele(vp);
1046 		return (error);
1047 	}
1048 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1049 	if ((error = namei(&nd)) == 0) {
1050 		if (nd.ni_vp != NULL) {
1051 			vrele(nd.ni_vp);
1052 			error = EEXIST;
1053 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1054 		    == 0) {
1055 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1056 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1057 #ifdef MAC
1058 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1059 			    vp, &nd.ni_cnd);
1060 			if (error == 0)
1061 #endif
1062 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1063 			VOP_UNLOCK(vp, 0, td);
1064 		}
1065 		NDFREE(&nd, NDF_ONLY_PNBUF);
1066 		vput(nd.ni_dvp);
1067 	}
1068 	vrele(vp);
1069 	vn_finished_write(mp);
1070 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1071 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1072 	return (error);
1073 }
1074 
1075 /*
1076  * Make a symbolic link.
1077  */
1078 #ifndef _SYS_SYSPROTO_H_
1079 struct symlink_args {
1080 	char	*path;
1081 	char	*link;
1082 };
1083 #endif
1084 /* ARGSUSED */
1085 int
1086 symlink(td, uap)
1087 	struct thread *td;
1088 	register struct symlink_args /* {
1089 		syscallarg(char *) path;
1090 		syscallarg(char *) link;
1091 	} */ *uap;
1092 {
1093 
1094 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1095 }
1096 
1097 int
1098 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1099 {
1100 	struct mount *mp;
1101 	struct vattr vattr;
1102 	char *syspath;
1103 	int error;
1104 	struct nameidata nd;
1105 
1106 	if (segflg == UIO_SYSSPACE) {
1107 		syspath = path;
1108 	} else {
1109 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1110 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1111 			goto out;
1112 	}
1113 restart:
1114 	bwillwrite();
1115 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1116 	if ((error = namei(&nd)) != 0)
1117 		goto out;
1118 	if (nd.ni_vp) {
1119 		NDFREE(&nd, NDF_ONLY_PNBUF);
1120 		vrele(nd.ni_vp);
1121 		vput(nd.ni_dvp);
1122 		error = EEXIST;
1123 		goto out;
1124 	}
1125 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1126 		NDFREE(&nd, NDF_ONLY_PNBUF);
1127 		vput(nd.ni_dvp);
1128 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1129 			return (error);
1130 		goto restart;
1131 	}
1132 	VATTR_NULL(&vattr);
1133 	FILEDESC_LOCK(td->td_proc->p_fd);
1134 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1135 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1136 #ifdef MAC
1137 	vattr.va_type = VLNK;
1138 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1139 	    &vattr);
1140 	if (error)
1141 		goto out2;
1142 #endif
1143 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1144 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1145 	if (error == 0)
1146 		vput(nd.ni_vp);
1147 #ifdef MAC
1148 out2:
1149 #endif
1150 	NDFREE(&nd, NDF_ONLY_PNBUF);
1151 	vput(nd.ni_dvp);
1152 	vn_finished_write(mp);
1153 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1154 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1155 out:
1156 	if (segflg != UIO_SYSSPACE)
1157 		uma_zfree(namei_zone, syspath);
1158 	return (error);
1159 }
1160 
1161 /*
1162  * Delete a whiteout from the filesystem.
1163  */
1164 /* ARGSUSED */
1165 int
1166 undelete(td, uap)
1167 	struct thread *td;
1168 	register struct undelete_args /* {
1169 		syscallarg(char *) path;
1170 	} */ *uap;
1171 {
1172 	int error;
1173 	struct mount *mp;
1174 	struct nameidata nd;
1175 
1176 restart:
1177 	bwillwrite();
1178 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1179 	    SCARG(uap, path), td);
1180 	error = namei(&nd);
1181 	if (error)
1182 		return (error);
1183 
1184 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1185 		NDFREE(&nd, NDF_ONLY_PNBUF);
1186 		if (nd.ni_vp)
1187 			vrele(nd.ni_vp);
1188 		vput(nd.ni_dvp);
1189 		return (EEXIST);
1190 	}
1191 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1192 		NDFREE(&nd, NDF_ONLY_PNBUF);
1193 		vput(nd.ni_dvp);
1194 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1195 			return (error);
1196 		goto restart;
1197 	}
1198 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1199 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1200 	NDFREE(&nd, NDF_ONLY_PNBUF);
1201 	vput(nd.ni_dvp);
1202 	vn_finished_write(mp);
1203 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1204 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1205 	return (error);
1206 }
1207 
1208 /*
1209  * Delete a name from the filesystem.
1210  */
1211 #ifndef _SYS_SYSPROTO_H_
1212 struct unlink_args {
1213 	char	*path;
1214 };
1215 #endif
1216 /* ARGSUSED */
1217 int
1218 unlink(td, uap)
1219 	struct thread *td;
1220 	struct unlink_args /* {
1221 		syscallarg(char *) path;
1222 	} */ *uap;
1223 {
1224 
1225 	return (kern_unlink(td, SCARG(uap, path), UIO_USERSPACE));
1226 }
1227 
1228 int
1229 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1230 {
1231 	struct mount *mp;
1232 	struct vnode *vp;
1233 	int error;
1234 	struct nameidata nd;
1235 
1236 restart:
1237 	bwillwrite();
1238 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1239 	if ((error = namei(&nd)) != 0)
1240 		return (error);
1241 	vp = nd.ni_vp;
1242 	if (vp->v_type == VDIR)
1243 		error = EPERM;		/* POSIX */
1244 	else {
1245 		/*
1246 		 * The root of a mounted filesystem cannot be deleted.
1247 		 *
1248 		 * XXX: can this only be a VDIR case?
1249 		 */
1250 		if (vp->v_vflag & VV_ROOT)
1251 			error = EBUSY;
1252 	}
1253 	if (error == 0) {
1254 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1255 			NDFREE(&nd, NDF_ONLY_PNBUF);
1256 			if (vp == nd.ni_dvp)
1257 				vrele(vp);
1258 			else
1259 				vput(vp);
1260 			vput(nd.ni_dvp);
1261 			if ((error = vn_start_write(NULL, &mp,
1262 			    V_XSLEEP | PCATCH)) != 0)
1263 				return (error);
1264 			goto restart;
1265 		}
1266 #ifdef MAC
1267 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1268 		    &nd.ni_cnd);
1269 		if (error)
1270 			goto out;
1271 #endif
1272 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1273 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1274 #ifdef MAC
1275 out:
1276 #endif
1277 		vn_finished_write(mp);
1278 	}
1279 	NDFREE(&nd, NDF_ONLY_PNBUF);
1280 	if (vp == nd.ni_dvp)
1281 		vrele(vp);
1282 	else
1283 		vput(vp);
1284 	vput(nd.ni_dvp);
1285 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1286 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1287 	return (error);
1288 }
1289 
1290 /*
1291  * Reposition read/write file offset.
1292  */
1293 #ifndef _SYS_SYSPROTO_H_
1294 struct lseek_args {
1295 	int	fd;
1296 	int	pad;
1297 	off_t	offset;
1298 	int	whence;
1299 };
1300 #endif
1301 int
1302 lseek(td, uap)
1303 	struct thread *td;
1304 	register struct lseek_args /* {
1305 		syscallarg(int) fd;
1306 		syscallarg(int) pad;
1307 		syscallarg(off_t) offset;
1308 		syscallarg(int) whence;
1309 	} */ *uap;
1310 {
1311 	struct ucred *cred = td->td_ucred;
1312 	struct file *fp;
1313 	struct vnode *vp;
1314 	struct vattr vattr;
1315 	off_t offset;
1316 	int error, noneg;
1317 
1318 	if ((error = fget(td, uap->fd, &fp)) != 0)
1319 		return (error);
1320 	if (fp->f_type != DTYPE_VNODE) {
1321 		fdrop(fp, td);
1322 		return (ESPIPE);
1323 	}
1324 	vp = (struct vnode *)fp->f_data;
1325 	noneg = (vp->v_type != VCHR);
1326 	offset = SCARG(uap, offset);
1327 	switch (SCARG(uap, whence)) {
1328 	case L_INCR:
1329 		if (noneg &&
1330 		    (fp->f_offset < 0 ||
1331 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1332 			return (EOVERFLOW);
1333 		offset += fp->f_offset;
1334 		break;
1335 	case L_XTND:
1336 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1337 		error = VOP_GETATTR(vp, &vattr, cred, td);
1338 		VOP_UNLOCK(vp, 0, td);
1339 		if (error)
1340 			return (error);
1341 		if (noneg &&
1342 		    (vattr.va_size > OFF_MAX ||
1343 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1344 			return (EOVERFLOW);
1345 		offset += vattr.va_size;
1346 		break;
1347 	case L_SET:
1348 		break;
1349 	default:
1350 		fdrop(fp, td);
1351 		return (EINVAL);
1352 	}
1353 	if (noneg && offset < 0)
1354 		return (EINVAL);
1355 	fp->f_offset = offset;
1356 	*(off_t *)(td->td_retval) = fp->f_offset;
1357 	fdrop(fp, td);
1358 	return (0);
1359 }
1360 
1361 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1362 /*
1363  * Reposition read/write file offset.
1364  */
1365 #ifndef _SYS_SYSPROTO_H_
1366 struct olseek_args {
1367 	int	fd;
1368 	long	offset;
1369 	int	whence;
1370 };
1371 #endif
1372 int
1373 olseek(td, uap)
1374 	struct thread *td;
1375 	register struct olseek_args /* {
1376 		syscallarg(int) fd;
1377 		syscallarg(long) offset;
1378 		syscallarg(int) whence;
1379 	} */ *uap;
1380 {
1381 	struct lseek_args /* {
1382 		syscallarg(int) fd;
1383 		syscallarg(int) pad;
1384 		syscallarg(off_t) offset;
1385 		syscallarg(int) whence;
1386 	} */ nuap;
1387 	int error;
1388 
1389 	SCARG(&nuap, fd) = SCARG(uap, fd);
1390 	SCARG(&nuap, offset) = SCARG(uap, offset);
1391 	SCARG(&nuap, whence) = SCARG(uap, whence);
1392 	error = lseek(td, &nuap);
1393 	return (error);
1394 }
1395 #endif /* COMPAT_43 */
1396 
1397 /*
1398  * Check access permissions using passed credentials.
1399  */
1400 static int
1401 vn_access(vp, user_flags, cred, td)
1402 	struct vnode	*vp;
1403 	int		user_flags;
1404 	struct ucred	*cred;
1405 	struct thread	*td;
1406 {
1407 	int error, flags;
1408 
1409 	/* Flags == 0 means only check for existence. */
1410 	error = 0;
1411 	if (user_flags) {
1412 		flags = 0;
1413 		if (user_flags & R_OK)
1414 			flags |= VREAD;
1415 		if (user_flags & W_OK)
1416 			flags |= VWRITE;
1417 		if (user_flags & X_OK)
1418 			flags |= VEXEC;
1419 #ifdef MAC
1420 		error = mac_check_vnode_access(cred, vp, flags);
1421 		if (error)
1422 			return (error);
1423 #endif
1424 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1425 			error = VOP_ACCESS(vp, flags, cred, td);
1426 	}
1427 	return (error);
1428 }
1429 
1430 /*
1431  * Check access permissions using "real" credentials.
1432  */
1433 #ifndef _SYS_SYSPROTO_H_
1434 struct access_args {
1435 	char	*path;
1436 	int	flags;
1437 };
1438 #endif
1439 int
1440 access(td, uap)
1441 	struct thread *td;
1442 	register struct access_args /* {
1443 		syscallarg(char *) path;
1444 		syscallarg(int) flags;
1445 	} */ *uap;
1446 {
1447 
1448 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1449 }
1450 
1451 int
1452 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1453 {
1454 	struct ucred *cred, *tmpcred;
1455 	register struct vnode *vp;
1456 	int error;
1457 	struct nameidata nd;
1458 
1459 	/*
1460 	 * Create and modify a temporary credential instead of one that
1461 	 * is potentially shared.  This could also mess up socket
1462 	 * buffer accounting which can run in an interrupt context.
1463 	 *
1464 	 * XXX - Depending on how "threads" are finally implemented, it
1465 	 * may be better to explicitly pass the credential to namei()
1466 	 * rather than to modify the potentially shared process structure.
1467 	 */
1468 	cred = td->td_ucred;
1469 	tmpcred = crdup(cred);
1470 	tmpcred->cr_uid = cred->cr_ruid;
1471 	tmpcred->cr_groups[0] = cred->cr_rgid;
1472 	td->td_ucred = tmpcred;
1473 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1474 	if ((error = namei(&nd)) != 0)
1475 		goto out1;
1476 	vp = nd.ni_vp;
1477 
1478 	error = vn_access(vp, flags, tmpcred, td);
1479 	NDFREE(&nd, NDF_ONLY_PNBUF);
1480 	vput(vp);
1481 out1:
1482 	td->td_ucred = cred;
1483 	crfree(tmpcred);
1484 	return (error);
1485 }
1486 
1487 /*
1488  * Check access permissions using "effective" credentials.
1489  */
1490 #ifndef _SYS_SYSPROTO_H_
1491 struct eaccess_args {
1492 	char	*path;
1493 	int	flags;
1494 };
1495 #endif
1496 int
1497 eaccess(td, uap)
1498 	struct thread *td;
1499 	register struct eaccess_args /* {
1500 		syscallarg(char *) path;
1501 		syscallarg(int) flags;
1502 	} */ *uap;
1503 {
1504 	struct nameidata nd;
1505 	struct vnode *vp;
1506 	int error;
1507 
1508 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1509 	    SCARG(uap, path), td);
1510 	if ((error = namei(&nd)) != 0)
1511 		return (error);
1512 	vp = nd.ni_vp;
1513 
1514 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1515 	NDFREE(&nd, NDF_ONLY_PNBUF);
1516 	vput(vp);
1517 	return (error);
1518 }
1519 
1520 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1521 /*
1522  * Get file status; this version follows links.
1523  */
1524 #ifndef _SYS_SYSPROTO_H_
1525 struct ostat_args {
1526 	char	*path;
1527 	struct ostat *ub;
1528 };
1529 #endif
1530 /* ARGSUSED */
1531 int
1532 ostat(td, uap)
1533 	struct thread *td;
1534 	register struct ostat_args /* {
1535 		syscallarg(char *) path;
1536 		syscallarg(struct ostat *) ub;
1537 	} */ *uap;
1538 {
1539 	struct stat sb;
1540 	struct ostat osb;
1541 	int error;
1542 	struct nameidata nd;
1543 
1544 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1545 	    SCARG(uap, path), td);
1546 	if ((error = namei(&nd)) != 0)
1547 		return (error);
1548 	NDFREE(&nd, NDF_ONLY_PNBUF);
1549 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1550 	vput(nd.ni_vp);
1551 	if (error)
1552 		return (error);
1553 	cvtstat(&sb, &osb);
1554 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1555 	return (error);
1556 }
1557 
1558 /*
1559  * Get file status; this version does not follow links.
1560  */
1561 #ifndef _SYS_SYSPROTO_H_
1562 struct olstat_args {
1563 	char	*path;
1564 	struct ostat *ub;
1565 };
1566 #endif
1567 /* ARGSUSED */
1568 int
1569 olstat(td, uap)
1570 	struct thread *td;
1571 	register struct olstat_args /* {
1572 		syscallarg(char *) path;
1573 		syscallarg(struct ostat *) ub;
1574 	} */ *uap;
1575 {
1576 	struct vnode *vp;
1577 	struct stat sb;
1578 	struct ostat osb;
1579 	int error;
1580 	struct nameidata nd;
1581 
1582 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1583 	    SCARG(uap, path), td);
1584 	if ((error = namei(&nd)) != 0)
1585 		return (error);
1586 	vp = nd.ni_vp;
1587 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1588 	NDFREE(&nd, NDF_ONLY_PNBUF);
1589 	vput(vp);
1590 	if (error)
1591 		return (error);
1592 	cvtstat(&sb, &osb);
1593 	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1594 	return (error);
1595 }
1596 
1597 /*
1598  * Convert from an old to a new stat structure.
1599  */
1600 void
1601 cvtstat(st, ost)
1602 	struct stat *st;
1603 	struct ostat *ost;
1604 {
1605 
1606 	ost->st_dev = st->st_dev;
1607 	ost->st_ino = st->st_ino;
1608 	ost->st_mode = st->st_mode;
1609 	ost->st_nlink = st->st_nlink;
1610 	ost->st_uid = st->st_uid;
1611 	ost->st_gid = st->st_gid;
1612 	ost->st_rdev = st->st_rdev;
1613 	if (st->st_size < (quad_t)1 << 32)
1614 		ost->st_size = st->st_size;
1615 	else
1616 		ost->st_size = -2;
1617 	ost->st_atime = st->st_atime;
1618 	ost->st_mtime = st->st_mtime;
1619 	ost->st_ctime = st->st_ctime;
1620 	ost->st_blksize = st->st_blksize;
1621 	ost->st_blocks = st->st_blocks;
1622 	ost->st_flags = st->st_flags;
1623 	ost->st_gen = st->st_gen;
1624 }
1625 #endif /* COMPAT_43 || COMPAT_SUNOS */
1626 
1627 /*
1628  * Get file status; this version follows links.
1629  */
1630 #ifndef _SYS_SYSPROTO_H_
1631 struct stat_args {
1632 	char	*path;
1633 	struct stat *ub;
1634 };
1635 #endif
1636 /* ARGSUSED */
1637 int
1638 stat(td, uap)
1639 	struct thread *td;
1640 	register struct stat_args /* {
1641 		syscallarg(char *) path;
1642 		syscallarg(struct stat *) ub;
1643 	} */ *uap;
1644 {
1645 	struct stat sb;
1646 	int error;
1647 	struct nameidata nd;
1648 
1649 #ifdef LOOKUP_SHARED
1650 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1651 	    UIO_USERSPACE, SCARG(uap, path), td);
1652 #else
1653 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1654 	    SCARG(uap, path), td);
1655 #endif
1656 	if ((error = namei(&nd)) != 0)
1657 		return (error);
1658 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1659 	NDFREE(&nd, NDF_ONLY_PNBUF);
1660 	vput(nd.ni_vp);
1661 	if (error)
1662 		return (error);
1663 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1664 	return (error);
1665 }
1666 
1667 /*
1668  * Get file status; this version does not follow links.
1669  */
1670 #ifndef _SYS_SYSPROTO_H_
1671 struct lstat_args {
1672 	char	*path;
1673 	struct stat *ub;
1674 };
1675 #endif
1676 /* ARGSUSED */
1677 int
1678 lstat(td, uap)
1679 	struct thread *td;
1680 	register struct lstat_args /* {
1681 		syscallarg(char *) path;
1682 		syscallarg(struct stat *) ub;
1683 	} */ *uap;
1684 {
1685 	int error;
1686 	struct vnode *vp;
1687 	struct stat sb;
1688 	struct nameidata nd;
1689 
1690 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1691 	    SCARG(uap, path), td);
1692 	if ((error = namei(&nd)) != 0)
1693 		return (error);
1694 	vp = nd.ni_vp;
1695 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1696 	NDFREE(&nd, NDF_ONLY_PNBUF);
1697 	vput(vp);
1698 	if (error)
1699 		return (error);
1700 	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1701 	return (error);
1702 }
1703 
1704 /*
1705  * Implementation of the NetBSD stat() function.
1706  * XXX This should probably be collapsed with the FreeBSD version,
1707  * as the differences are only due to vn_stat() clearing spares at
1708  * the end of the structures.  vn_stat could be split to avoid this,
1709  * and thus collapse the following to close to zero code.
1710  */
1711 void
1712 cvtnstat(sb, nsb)
1713 	struct stat *sb;
1714 	struct nstat *nsb;
1715 {
1716 	bzero(nsb, sizeof *nsb);
1717 	nsb->st_dev = sb->st_dev;
1718 	nsb->st_ino = sb->st_ino;
1719 	nsb->st_mode = sb->st_mode;
1720 	nsb->st_nlink = sb->st_nlink;
1721 	nsb->st_uid = sb->st_uid;
1722 	nsb->st_gid = sb->st_gid;
1723 	nsb->st_rdev = sb->st_rdev;
1724 	nsb->st_atimespec = sb->st_atimespec;
1725 	nsb->st_mtimespec = sb->st_mtimespec;
1726 	nsb->st_ctimespec = sb->st_ctimespec;
1727 	nsb->st_size = sb->st_size;
1728 	nsb->st_blocks = sb->st_blocks;
1729 	nsb->st_blksize = sb->st_blksize;
1730 	nsb->st_flags = sb->st_flags;
1731 	nsb->st_gen = sb->st_gen;
1732 	nsb->st_birthtimespec = sb->st_birthtimespec;
1733 }
1734 
1735 #ifndef _SYS_SYSPROTO_H_
1736 struct nstat_args {
1737 	char	*path;
1738 	struct nstat *ub;
1739 };
1740 #endif
1741 /* ARGSUSED */
1742 int
1743 nstat(td, uap)
1744 	struct thread *td;
1745 	register struct nstat_args /* {
1746 		syscallarg(char *) path;
1747 		syscallarg(struct nstat *) ub;
1748 	} */ *uap;
1749 {
1750 	struct stat sb;
1751 	struct nstat nsb;
1752 	int error;
1753 	struct nameidata nd;
1754 
1755 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1756 	    SCARG(uap, path), td);
1757 	if ((error = namei(&nd)) != 0)
1758 		return (error);
1759 	NDFREE(&nd, NDF_ONLY_PNBUF);
1760 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1761 	vput(nd.ni_vp);
1762 	if (error)
1763 		return (error);
1764 	cvtnstat(&sb, &nsb);
1765 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1766 	return (error);
1767 }
1768 
1769 /*
1770  * NetBSD lstat.  Get file status; this version does not follow links.
1771  */
1772 #ifndef _SYS_SYSPROTO_H_
1773 struct lstat_args {
1774 	char	*path;
1775 	struct stat *ub;
1776 };
1777 #endif
1778 /* ARGSUSED */
1779 int
1780 nlstat(td, uap)
1781 	struct thread *td;
1782 	register struct nlstat_args /* {
1783 		syscallarg(char *) path;
1784 		syscallarg(struct nstat *) ub;
1785 	} */ *uap;
1786 {
1787 	int error;
1788 	struct vnode *vp;
1789 	struct stat sb;
1790 	struct nstat nsb;
1791 	struct nameidata nd;
1792 
1793 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1794 	    SCARG(uap, path), td);
1795 	if ((error = namei(&nd)) != 0)
1796 		return (error);
1797 	vp = nd.ni_vp;
1798 	NDFREE(&nd, NDF_ONLY_PNBUF);
1799 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1800 	vput(vp);
1801 	if (error)
1802 		return (error);
1803 	cvtnstat(&sb, &nsb);
1804 	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1805 	return (error);
1806 }
1807 
1808 /*
1809  * Get configurable pathname variables.
1810  */
1811 #ifndef _SYS_SYSPROTO_H_
1812 struct pathconf_args {
1813 	char	*path;
1814 	int	name;
1815 };
1816 #endif
1817 /* ARGSUSED */
1818 int
1819 pathconf(td, uap)
1820 	struct thread *td;
1821 	register struct pathconf_args /* {
1822 		syscallarg(char *) path;
1823 		syscallarg(int) name;
1824 	} */ *uap;
1825 {
1826 	int error;
1827 	struct nameidata nd;
1828 
1829 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1830 	    SCARG(uap, path), td);
1831 	if ((error = namei(&nd)) != 0)
1832 		return (error);
1833 	NDFREE(&nd, NDF_ONLY_PNBUF);
1834 
1835 	/* If asynchronous I/O is available, it works for all files. */
1836 	if (uap->name == _PC_ASYNC_IO)
1837 		td->td_retval[0] = async_io_version;
1838 	else
1839 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1840 	vput(nd.ni_vp);
1841 	return (error);
1842 }
1843 
1844 /*
1845  * Return target name of a symbolic link.
1846  */
1847 #ifndef _SYS_SYSPROTO_H_
1848 struct readlink_args {
1849 	char	*path;
1850 	char	*buf;
1851 	int	count;
1852 };
1853 #endif
1854 /* ARGSUSED */
1855 int
1856 readlink(td, uap)
1857 	struct thread *td;
1858 	register struct readlink_args /* {
1859 		syscallarg(char *) path;
1860 		syscallarg(char *) buf;
1861 		syscallarg(int) count;
1862 	} */ *uap;
1863 {
1864 
1865 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1866 	    UIO_USERSPACE, uap->count));
1867 }
1868 
1869 int
1870 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1871     enum uio_seg bufseg, int count)
1872 {
1873 	register struct vnode *vp;
1874 	struct iovec aiov;
1875 	struct uio auio;
1876 	int error;
1877 	struct nameidata nd;
1878 
1879 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1880 	if ((error = namei(&nd)) != 0)
1881 		return (error);
1882 	NDFREE(&nd, NDF_ONLY_PNBUF);
1883 	vp = nd.ni_vp;
1884 #ifdef MAC
1885 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1886 	if (error) {
1887 		vput(vp);
1888 		return (error);
1889 	}
1890 #endif
1891 	if (vp->v_type != VLNK)
1892 		error = EINVAL;
1893 	else {
1894 		aiov.iov_base = buf;
1895 		aiov.iov_len = count;
1896 		auio.uio_iov = &aiov;
1897 		auio.uio_iovcnt = 1;
1898 		auio.uio_offset = 0;
1899 		auio.uio_rw = UIO_READ;
1900 		auio.uio_segflg = bufseg;
1901 		auio.uio_td = td;
1902 		auio.uio_resid = count;
1903 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1904 	}
1905 	vput(vp);
1906 	td->td_retval[0] = count - auio.uio_resid;
1907 	return (error);
1908 }
1909 
1910 /*
1911  * Common implementation code for chflags() and fchflags().
1912  */
1913 static int
1914 setfflags(td, vp, flags)
1915 	struct thread *td;
1916 	struct vnode *vp;
1917 	int flags;
1918 {
1919 	int error;
1920 	struct mount *mp;
1921 	struct vattr vattr;
1922 
1923 	/*
1924 	 * Prevent non-root users from setting flags on devices.  When
1925 	 * a device is reused, users can retain ownership of the device
1926 	 * if they are allowed to set flags and programs assume that
1927 	 * chown can't fail when done as root.
1928 	 */
1929 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1930 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1931 		if (error)
1932 			return (error);
1933 	}
1934 
1935 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1936 		return (error);
1937 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1938 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1939 #ifdef MAC
1940 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1941 	if (error == 0) {
1942 #endif
1943 		VATTR_NULL(&vattr);
1944 		vattr.va_flags = flags;
1945 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1946 #ifdef MAC
1947 	}
1948 #endif
1949 	VOP_UNLOCK(vp, 0, td);
1950 	vn_finished_write(mp);
1951 	return (error);
1952 }
1953 
1954 /*
1955  * Change flags of a file given a path name.
1956  */
1957 #ifndef _SYS_SYSPROTO_H_
1958 struct chflags_args {
1959 	char	*path;
1960 	int	flags;
1961 };
1962 #endif
1963 /* ARGSUSED */
1964 int
1965 chflags(td, uap)
1966 	struct thread *td;
1967 	register struct chflags_args /* {
1968 		syscallarg(char *) path;
1969 		syscallarg(int) flags;
1970 	} */ *uap;
1971 {
1972 	int error;
1973 	struct nameidata nd;
1974 
1975 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1976 	if ((error = namei(&nd)) != 0)
1977 		return (error);
1978 	NDFREE(&nd, NDF_ONLY_PNBUF);
1979 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1980 	vrele(nd.ni_vp);
1981 	return error;
1982 }
1983 
1984 /*
1985  * Same as chflags() but doesn't follow symlinks.
1986  */
1987 int
1988 lchflags(td, uap)
1989 	struct thread *td;
1990 	register struct lchflags_args /* {
1991 		syscallarg(char *) path;
1992 		syscallarg(int) flags;
1993 	} */ *uap;
1994 {
1995 	int error;
1996 	struct nameidata nd;
1997 
1998 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1999 	if ((error = namei(&nd)) != 0)
2000 		return (error);
2001 	NDFREE(&nd, NDF_ONLY_PNBUF);
2002 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2003 	vrele(nd.ni_vp);
2004 	return error;
2005 }
2006 
2007 /*
2008  * Change flags of a file given a file descriptor.
2009  */
2010 #ifndef _SYS_SYSPROTO_H_
2011 struct fchflags_args {
2012 	int	fd;
2013 	int	flags;
2014 };
2015 #endif
2016 /* ARGSUSED */
2017 int
2018 fchflags(td, uap)
2019 	struct thread *td;
2020 	register struct fchflags_args /* {
2021 		syscallarg(int) fd;
2022 		syscallarg(int) flags;
2023 	} */ *uap;
2024 {
2025 	struct file *fp;
2026 	int error;
2027 
2028 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2029 		return (error);
2030 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2031 	fdrop(fp, td);
2032 	return (error);
2033 }
2034 
2035 /*
2036  * Common implementation code for chmod(), lchmod() and fchmod().
2037  */
2038 static int
2039 setfmode(td, vp, mode)
2040 	struct thread *td;
2041 	struct vnode *vp;
2042 	int mode;
2043 {
2044 	int error;
2045 	struct mount *mp;
2046 	struct vattr vattr;
2047 
2048 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2049 		return (error);
2050 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2051 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2052 	VATTR_NULL(&vattr);
2053 	vattr.va_mode = mode & ALLPERMS;
2054 #ifdef MAC
2055 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2056 	if (error == 0)
2057 #endif
2058 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2059 	VOP_UNLOCK(vp, 0, td);
2060 	vn_finished_write(mp);
2061 	return error;
2062 }
2063 
2064 /*
2065  * Change mode of a file given path name.
2066  */
2067 #ifndef _SYS_SYSPROTO_H_
2068 struct chmod_args {
2069 	char	*path;
2070 	int	mode;
2071 };
2072 #endif
2073 /* ARGSUSED */
2074 int
2075 chmod(td, uap)
2076 	struct thread *td;
2077 	register struct chmod_args /* {
2078 		syscallarg(char *) path;
2079 		syscallarg(int) mode;
2080 	} */ *uap;
2081 {
2082 
2083 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2084 }
2085 
2086 int
2087 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2088 {
2089 	int error;
2090 	struct nameidata nd;
2091 
2092 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2093 	if ((error = namei(&nd)) != 0)
2094 		return (error);
2095 	NDFREE(&nd, NDF_ONLY_PNBUF);
2096 	error = setfmode(td, nd.ni_vp, mode);
2097 	vrele(nd.ni_vp);
2098 	return error;
2099 }
2100 
2101 /*
2102  * Change mode of a file given path name (don't follow links.)
2103  */
2104 #ifndef _SYS_SYSPROTO_H_
2105 struct lchmod_args {
2106 	char	*path;
2107 	int	mode;
2108 };
2109 #endif
2110 /* ARGSUSED */
2111 int
2112 lchmod(td, uap)
2113 	struct thread *td;
2114 	register struct lchmod_args /* {
2115 		syscallarg(char *) path;
2116 		syscallarg(int) mode;
2117 	} */ *uap;
2118 {
2119 	int error;
2120 	struct nameidata nd;
2121 
2122 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2123 	if ((error = namei(&nd)) != 0)
2124 		return (error);
2125 	NDFREE(&nd, NDF_ONLY_PNBUF);
2126 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2127 	vrele(nd.ni_vp);
2128 	return error;
2129 }
2130 
2131 /*
2132  * Change mode of a file given a file descriptor.
2133  */
2134 #ifndef _SYS_SYSPROTO_H_
2135 struct fchmod_args {
2136 	int	fd;
2137 	int	mode;
2138 };
2139 #endif
2140 /* ARGSUSED */
2141 int
2142 fchmod(td, uap)
2143 	struct thread *td;
2144 	register struct fchmod_args /* {
2145 		syscallarg(int) fd;
2146 		syscallarg(int) mode;
2147 	} */ *uap;
2148 {
2149 	struct file *fp;
2150 	struct vnode *vp;
2151 	int error;
2152 
2153 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2154 		return (error);
2155 	vp = (struct vnode *)fp->f_data;
2156 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2157 	fdrop(fp, td);
2158 	return (error);
2159 }
2160 
2161 /*
2162  * Common implementation for chown(), lchown(), and fchown()
2163  */
2164 static int
2165 setfown(td, vp, uid, gid)
2166 	struct thread *td;
2167 	struct vnode *vp;
2168 	uid_t uid;
2169 	gid_t gid;
2170 {
2171 	int error;
2172 	struct mount *mp;
2173 	struct vattr vattr;
2174 
2175 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2176 		return (error);
2177 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2178 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2179 	VATTR_NULL(&vattr);
2180 	vattr.va_uid = uid;
2181 	vattr.va_gid = gid;
2182 #ifdef MAC
2183 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2184 	    vattr.va_gid);
2185 	if (error == 0)
2186 #endif
2187 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2188 	VOP_UNLOCK(vp, 0, td);
2189 	vn_finished_write(mp);
2190 	return error;
2191 }
2192 
2193 /*
2194  * Set ownership given a path name.
2195  */
2196 #ifndef _SYS_SYSPROTO_H_
2197 struct chown_args {
2198 	char	*path;
2199 	int	uid;
2200 	int	gid;
2201 };
2202 #endif
2203 /* ARGSUSED */
2204 int
2205 chown(td, uap)
2206 	struct thread *td;
2207 	register struct chown_args /* {
2208 		syscallarg(char *) path;
2209 		syscallarg(int) uid;
2210 		syscallarg(int) gid;
2211 	} */ *uap;
2212 {
2213 
2214 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2215 }
2216 
2217 int
2218 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2219     int gid)
2220 {
2221 	int error;
2222 	struct nameidata nd;
2223 
2224 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2225 	if ((error = namei(&nd)) != 0)
2226 		return (error);
2227 	NDFREE(&nd, NDF_ONLY_PNBUF);
2228 	error = setfown(td, nd.ni_vp, uid, gid);
2229 	vrele(nd.ni_vp);
2230 	return (error);
2231 }
2232 
2233 /*
2234  * Set ownership given a path name, do not cross symlinks.
2235  */
2236 #ifndef _SYS_SYSPROTO_H_
2237 struct lchown_args {
2238 	char	*path;
2239 	int	uid;
2240 	int	gid;
2241 };
2242 #endif
2243 /* ARGSUSED */
2244 int
2245 lchown(td, uap)
2246 	struct thread *td;
2247 	register struct lchown_args /* {
2248 		syscallarg(char *) path;
2249 		syscallarg(int) uid;
2250 		syscallarg(int) gid;
2251 	} */ *uap;
2252 {
2253 
2254 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2255 }
2256 
2257 int
2258 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2259     int gid)
2260 {
2261 	int error;
2262 	struct nameidata nd;
2263 
2264 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2265 	if ((error = namei(&nd)) != 0)
2266 		return (error);
2267 	NDFREE(&nd, NDF_ONLY_PNBUF);
2268 	error = setfown(td, nd.ni_vp, uid, gid);
2269 	vrele(nd.ni_vp);
2270 	return (error);
2271 }
2272 
2273 /*
2274  * Set ownership given a file descriptor.
2275  */
2276 #ifndef _SYS_SYSPROTO_H_
2277 struct fchown_args {
2278 	int	fd;
2279 	int	uid;
2280 	int	gid;
2281 };
2282 #endif
2283 /* ARGSUSED */
2284 int
2285 fchown(td, uap)
2286 	struct thread *td;
2287 	register struct fchown_args /* {
2288 		syscallarg(int) fd;
2289 		syscallarg(int) uid;
2290 		syscallarg(int) gid;
2291 	} */ *uap;
2292 {
2293 	struct file *fp;
2294 	struct vnode *vp;
2295 	int error;
2296 
2297 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2298 		return (error);
2299 	vp = (struct vnode *)fp->f_data;
2300 	error = setfown(td, (struct vnode *)fp->f_data,
2301 		SCARG(uap, uid), SCARG(uap, gid));
2302 	fdrop(fp, td);
2303 	return (error);
2304 }
2305 
2306 /*
2307  * Common implementation code for utimes(), lutimes(), and futimes().
2308  */
2309 static int
2310 getutimes(usrtvp, tvpseg, tsp)
2311 	const struct timeval *usrtvp;
2312 	enum uio_seg tvpseg;
2313 	struct timespec *tsp;
2314 {
2315 	struct timeval tv[2];
2316 	const struct timeval *tvp;
2317 	int error;
2318 
2319 	if (usrtvp == NULL) {
2320 		microtime(&tv[0]);
2321 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2322 		tsp[1] = tsp[0];
2323 	} else {
2324 		if (tvpseg == UIO_SYSSPACE) {
2325 			tvp = usrtvp;
2326 		} else {
2327 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2328 				return (error);
2329 			tvp = tv;
2330 		}
2331 
2332 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2333 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2334 	}
2335 	return 0;
2336 }
2337 
2338 /*
2339  * Common implementation code for utimes(), lutimes(), and futimes().
2340  */
2341 static int
2342 setutimes(td, vp, ts, numtimes, nullflag)
2343 	struct thread *td;
2344 	struct vnode *vp;
2345 	const struct timespec *ts;
2346 	int numtimes;
2347 	int nullflag;
2348 {
2349 	int error, setbirthtime;
2350 	struct mount *mp;
2351 	struct vattr vattr;
2352 
2353 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2354 		return (error);
2355 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2356 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2357 	setbirthtime = 0;
2358 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2359 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2360 		setbirthtime = 1;
2361 	VATTR_NULL(&vattr);
2362 	vattr.va_atime = ts[0];
2363 	vattr.va_mtime = ts[1];
2364 	if (setbirthtime)
2365 		vattr.va_birthtime = ts[1];
2366 	if (numtimes > 2)
2367 		vattr.va_birthtime = ts[2];
2368 	if (nullflag)
2369 		vattr.va_vaflags |= VA_UTIMES_NULL;
2370 #ifdef MAC
2371 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2372 	    vattr.va_mtime);
2373 #endif
2374 	if (error == 0)
2375 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2376 	VOP_UNLOCK(vp, 0, td);
2377 	vn_finished_write(mp);
2378 	return error;
2379 }
2380 
2381 /*
2382  * Set the access and modification times of a file.
2383  */
2384 #ifndef _SYS_SYSPROTO_H_
2385 struct utimes_args {
2386 	char	*path;
2387 	struct	timeval *tptr;
2388 };
2389 #endif
2390 /* ARGSUSED */
2391 int
2392 utimes(td, uap)
2393 	struct thread *td;
2394 	register struct utimes_args /* {
2395 		syscallarg(char *) path;
2396 		syscallarg(struct timeval *) tptr;
2397 	} */ *uap;
2398 {
2399 
2400 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2401 	    UIO_USERSPACE));
2402 }
2403 
2404 int
2405 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2406     struct timeval *tptr, enum uio_seg tptrseg)
2407 {
2408 	struct timespec ts[2];
2409 	int error;
2410 	struct nameidata nd;
2411 
2412 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2413 		return (error);
2414 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2415 	if ((error = namei(&nd)) != 0)
2416 		return (error);
2417 	NDFREE(&nd, NDF_ONLY_PNBUF);
2418 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2419 	vrele(nd.ni_vp);
2420 	return (error);
2421 }
2422 
2423 /*
2424  * Set the access and modification times of a file.
2425  */
2426 #ifndef _SYS_SYSPROTO_H_
2427 struct lutimes_args {
2428 	char	*path;
2429 	struct	timeval *tptr;
2430 };
2431 #endif
2432 /* ARGSUSED */
2433 int
2434 lutimes(td, uap)
2435 	struct thread *td;
2436 	register struct lutimes_args /* {
2437 		syscallarg(char *) path;
2438 		syscallarg(struct timeval *) tptr;
2439 	} */ *uap;
2440 {
2441 
2442 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2443 	    UIO_USERSPACE));
2444 }
2445 
2446 int
2447 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2448     struct timeval *tptr, enum uio_seg tptrseg)
2449 {
2450 	struct timespec ts[2];
2451 	int error;
2452 	struct nameidata nd;
2453 
2454 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2455 		return (error);
2456 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2457 	if ((error = namei(&nd)) != 0)
2458 		return (error);
2459 	NDFREE(&nd, NDF_ONLY_PNBUF);
2460 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2461 	vrele(nd.ni_vp);
2462 	return (error);
2463 }
2464 
2465 /*
2466  * Set the access and modification times of a file.
2467  */
2468 #ifndef _SYS_SYSPROTO_H_
2469 struct futimes_args {
2470 	int	fd;
2471 	struct	timeval *tptr;
2472 };
2473 #endif
2474 /* ARGSUSED */
2475 int
2476 futimes(td, uap)
2477 	struct thread *td;
2478 	register struct futimes_args /* {
2479 		syscallarg(int ) fd;
2480 		syscallarg(struct timeval *) tptr;
2481 	} */ *uap;
2482 {
2483 
2484 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2485 }
2486 
2487 int
2488 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2489     enum uio_seg tptrseg)
2490 {
2491 	struct timespec ts[2];
2492 	struct file *fp;
2493 	int error;
2494 
2495 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2496 		return (error);
2497 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2498 		return (error);
2499 	error = setutimes(td, (struct vnode *)fp->f_data, ts, 2, tptr == NULL);
2500 	fdrop(fp, td);
2501 	return (error);
2502 }
2503 
2504 /*
2505  * Truncate a file given its path name.
2506  */
2507 #ifndef _SYS_SYSPROTO_H_
2508 struct truncate_args {
2509 	char	*path;
2510 	int	pad;
2511 	off_t	length;
2512 };
2513 #endif
2514 /* ARGSUSED */
2515 int
2516 truncate(td, uap)
2517 	struct thread *td;
2518 	register struct truncate_args /* {
2519 		syscallarg(char *) path;
2520 		syscallarg(int) pad;
2521 		syscallarg(off_t) length;
2522 	} */ *uap;
2523 {
2524 
2525 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2526 }
2527 
2528 int
2529 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2530 {
2531 	struct mount *mp;
2532 	struct vnode *vp;
2533 	struct vattr vattr;
2534 	int error;
2535 	struct nameidata nd;
2536 
2537 	if (length < 0)
2538 		return(EINVAL);
2539 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2540 	if ((error = namei(&nd)) != 0)
2541 		return (error);
2542 	vp = nd.ni_vp;
2543 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2544 		vrele(vp);
2545 		return (error);
2546 	}
2547 	NDFREE(&nd, NDF_ONLY_PNBUF);
2548 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2549 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2550 	if (vp->v_type == VDIR)
2551 		error = EISDIR;
2552 #ifdef MAC
2553 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2554 	}
2555 #endif
2556 	else if ((error = vn_writechk(vp)) == 0 &&
2557 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2558 		VATTR_NULL(&vattr);
2559 		vattr.va_size = length;
2560 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2561 	}
2562 	vput(vp);
2563 	vn_finished_write(mp);
2564 	return (error);
2565 }
2566 
2567 /*
2568  * Truncate a file given a file descriptor.
2569  */
2570 #ifndef _SYS_SYSPROTO_H_
2571 struct ftruncate_args {
2572 	int	fd;
2573 	int	pad;
2574 	off_t	length;
2575 };
2576 #endif
2577 /* ARGSUSED */
2578 int
2579 ftruncate(td, uap)
2580 	struct thread *td;
2581 	register struct ftruncate_args /* {
2582 		syscallarg(int) fd;
2583 		syscallarg(int) pad;
2584 		syscallarg(off_t) length;
2585 	} */ *uap;
2586 {
2587 	struct mount *mp;
2588 	struct vattr vattr;
2589 	struct vnode *vp;
2590 	struct file *fp;
2591 	int error;
2592 
2593 	if (uap->length < 0)
2594 		return(EINVAL);
2595 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2596 		return (error);
2597 	if ((fp->f_flag & FWRITE) == 0) {
2598 		fdrop(fp, td);
2599 		return (EINVAL);
2600 	}
2601 	vp = (struct vnode *)fp->f_data;
2602 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2603 		fdrop(fp, td);
2604 		return (error);
2605 	}
2606 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2607 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2608 	if (vp->v_type == VDIR)
2609 		error = EISDIR;
2610 #ifdef MAC
2611 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2612 	    vp))) {
2613 	}
2614 #endif
2615 	else if ((error = vn_writechk(vp)) == 0) {
2616 		VATTR_NULL(&vattr);
2617 		vattr.va_size = SCARG(uap, length);
2618 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2619 	}
2620 	VOP_UNLOCK(vp, 0, td);
2621 	vn_finished_write(mp);
2622 	fdrop(fp, td);
2623 	return (error);
2624 }
2625 
2626 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2627 /*
2628  * Truncate a file given its path name.
2629  */
2630 #ifndef _SYS_SYSPROTO_H_
2631 struct otruncate_args {
2632 	char	*path;
2633 	long	length;
2634 };
2635 #endif
2636 /* ARGSUSED */
2637 int
2638 otruncate(td, uap)
2639 	struct thread *td;
2640 	register struct otruncate_args /* {
2641 		syscallarg(char *) path;
2642 		syscallarg(long) length;
2643 	} */ *uap;
2644 {
2645 	struct truncate_args /* {
2646 		syscallarg(char *) path;
2647 		syscallarg(int) pad;
2648 		syscallarg(off_t) length;
2649 	} */ nuap;
2650 
2651 	SCARG(&nuap, path) = SCARG(uap, path);
2652 	SCARG(&nuap, length) = SCARG(uap, length);
2653 	return (truncate(td, &nuap));
2654 }
2655 
2656 /*
2657  * Truncate a file given a file descriptor.
2658  */
2659 #ifndef _SYS_SYSPROTO_H_
2660 struct oftruncate_args {
2661 	int	fd;
2662 	long	length;
2663 };
2664 #endif
2665 /* ARGSUSED */
2666 int
2667 oftruncate(td, uap)
2668 	struct thread *td;
2669 	register struct oftruncate_args /* {
2670 		syscallarg(int) fd;
2671 		syscallarg(long) length;
2672 	} */ *uap;
2673 {
2674 	struct ftruncate_args /* {
2675 		syscallarg(int) fd;
2676 		syscallarg(int) pad;
2677 		syscallarg(off_t) length;
2678 	} */ nuap;
2679 
2680 	SCARG(&nuap, fd) = SCARG(uap, fd);
2681 	SCARG(&nuap, length) = SCARG(uap, length);
2682 	return (ftruncate(td, &nuap));
2683 }
2684 #endif /* COMPAT_43 || COMPAT_SUNOS */
2685 
2686 /*
2687  * Sync an open file.
2688  */
2689 #ifndef _SYS_SYSPROTO_H_
2690 struct fsync_args {
2691 	int	fd;
2692 };
2693 #endif
2694 /* ARGSUSED */
2695 int
2696 fsync(td, uap)
2697 	struct thread *td;
2698 	struct fsync_args /* {
2699 		syscallarg(int) fd;
2700 	} */ *uap;
2701 {
2702 	struct vnode *vp;
2703 	struct mount *mp;
2704 	struct file *fp;
2705 	vm_object_t obj;
2706 	int error;
2707 
2708 	GIANT_REQUIRED;
2709 
2710 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2711 		return (error);
2712 	vp = (struct vnode *)fp->f_data;
2713 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2714 		fdrop(fp, td);
2715 		return (error);
2716 	}
2717 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2718 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2719 		vm_object_page_clean(obj, 0, 0, 0);
2720 	}
2721 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2722 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2723 	    && softdep_fsync_hook != NULL)
2724 		error = (*softdep_fsync_hook)(vp);
2725 
2726 	VOP_UNLOCK(vp, 0, td);
2727 	vn_finished_write(mp);
2728 	fdrop(fp, td);
2729 	return (error);
2730 }
2731 
2732 /*
2733  * Rename files.  Source and destination must either both be directories,
2734  * or both not be directories.  If target is a directory, it must be empty.
2735  */
2736 #ifndef _SYS_SYSPROTO_H_
2737 struct rename_args {
2738 	char	*from;
2739 	char	*to;
2740 };
2741 #endif
2742 /* ARGSUSED */
2743 int
2744 rename(td, uap)
2745 	struct thread *td;
2746 	register struct rename_args /* {
2747 		syscallarg(char *) from;
2748 		syscallarg(char *) to;
2749 	} */ *uap;
2750 {
2751 
2752 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2753 }
2754 
2755 int
2756 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2757 {
2758 	struct mount *mp = NULL;
2759 	struct vnode *tvp, *fvp, *tdvp;
2760 	struct nameidata fromnd, tond;
2761 	int error;
2762 
2763 	bwillwrite();
2764 #ifdef MAC
2765 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2766 	    from, td);
2767 #else
2768 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2769 #endif
2770 	if ((error = namei(&fromnd)) != 0)
2771 		return (error);
2772 #ifdef MAC
2773 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2774 	    fromnd.ni_vp, &fromnd.ni_cnd);
2775 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2776 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2777 #endif
2778 	fvp = fromnd.ni_vp;
2779 	if (error == 0)
2780 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2781 	if (error != 0) {
2782 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2783 		vrele(fromnd.ni_dvp);
2784 		vrele(fvp);
2785 		goto out1;
2786 	}
2787 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2788 	    NOOBJ, pathseg, to, td);
2789 	if (fromnd.ni_vp->v_type == VDIR)
2790 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2791 	if ((error = namei(&tond)) != 0) {
2792 		/* Translate error code for rename("dir1", "dir2/."). */
2793 		if (error == EISDIR && fvp->v_type == VDIR)
2794 			error = EINVAL;
2795 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2796 		vrele(fromnd.ni_dvp);
2797 		vrele(fvp);
2798 		goto out1;
2799 	}
2800 	tdvp = tond.ni_dvp;
2801 	tvp = tond.ni_vp;
2802 	if (tvp != NULL) {
2803 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2804 			error = ENOTDIR;
2805 			goto out;
2806 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2807 			error = EISDIR;
2808 			goto out;
2809 		}
2810 	}
2811 	if (fvp == tdvp)
2812 		error = EINVAL;
2813 	/*
2814 	 * If the source is the same as the destination (that is, if they
2815 	 * are links to the same vnode), then there is nothing to do.
2816 	 */
2817 	if (fvp == tvp)
2818 		error = -1;
2819 #ifdef MAC
2820 	else
2821 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2822 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2823 #endif
2824 out:
2825 	if (!error) {
2826 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2827 		if (fromnd.ni_dvp != tdvp) {
2828 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2829 		}
2830 		if (tvp) {
2831 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2832 		}
2833 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2834 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2835 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2836 		NDFREE(&tond, NDF_ONLY_PNBUF);
2837 	} else {
2838 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2839 		NDFREE(&tond, NDF_ONLY_PNBUF);
2840 		if (tdvp == tvp)
2841 			vrele(tdvp);
2842 		else
2843 			vput(tdvp);
2844 		if (tvp)
2845 			vput(tvp);
2846 		vrele(fromnd.ni_dvp);
2847 		vrele(fvp);
2848 	}
2849 	vrele(tond.ni_startdir);
2850 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2851 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2852 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2853 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2854 out1:
2855 	vn_finished_write(mp);
2856 	if (fromnd.ni_startdir)
2857 		vrele(fromnd.ni_startdir);
2858 	if (error == -1)
2859 		return (0);
2860 	return (error);
2861 }
2862 
2863 /*
2864  * Make a directory file.
2865  */
2866 #ifndef _SYS_SYSPROTO_H_
2867 struct mkdir_args {
2868 	char	*path;
2869 	int	mode;
2870 };
2871 #endif
2872 /* ARGSUSED */
2873 int
2874 mkdir(td, uap)
2875 	struct thread *td;
2876 	register struct mkdir_args /* {
2877 		syscallarg(char *) path;
2878 		syscallarg(int) mode;
2879 	} */ *uap;
2880 {
2881 
2882 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2883 }
2884 
2885 int
2886 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2887 {
2888 	struct mount *mp;
2889 	struct vnode *vp;
2890 	struct vattr vattr;
2891 	int error;
2892 	struct nameidata nd;
2893 
2894 restart:
2895 	bwillwrite();
2896 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2897 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2898 	if ((error = namei(&nd)) != 0)
2899 		return (error);
2900 	vp = nd.ni_vp;
2901 	if (vp != NULL) {
2902 		NDFREE(&nd, NDF_ONLY_PNBUF);
2903 		vrele(vp);
2904 		/*
2905 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2906 		 * the strange behaviour of leaving the vnode unlocked
2907 		 * if the target is the same vnode as the parent.
2908 		 */
2909 		if (vp == nd.ni_dvp)
2910 			vrele(nd.ni_dvp);
2911 		else
2912 			vput(nd.ni_dvp);
2913 		return (EEXIST);
2914 	}
2915 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2916 		NDFREE(&nd, NDF_ONLY_PNBUF);
2917 		vput(nd.ni_dvp);
2918 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2919 			return (error);
2920 		goto restart;
2921 	}
2922 	VATTR_NULL(&vattr);
2923 	vattr.va_type = VDIR;
2924 	FILEDESC_LOCK(td->td_proc->p_fd);
2925 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2926 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2927 #ifdef MAC
2928 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2929 	    &vattr);
2930 	if (error)
2931 		goto out;
2932 #endif
2933 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2934 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2935 #ifdef MAC
2936 out:
2937 #endif
2938 	NDFREE(&nd, NDF_ONLY_PNBUF);
2939 	vput(nd.ni_dvp);
2940 	if (!error)
2941 		vput(nd.ni_vp);
2942 	vn_finished_write(mp);
2943 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2944 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2945 	return (error);
2946 }
2947 
2948 /*
2949  * Remove a directory file.
2950  */
2951 #ifndef _SYS_SYSPROTO_H_
2952 struct rmdir_args {
2953 	char	*path;
2954 };
2955 #endif
2956 /* ARGSUSED */
2957 int
2958 rmdir(td, uap)
2959 	struct thread *td;
2960 	struct rmdir_args /* {
2961 		syscallarg(char *) path;
2962 	} */ *uap;
2963 {
2964 
2965 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2966 }
2967 
2968 int
2969 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2970 {
2971 	struct mount *mp;
2972 	struct vnode *vp;
2973 	int error;
2974 	struct nameidata nd;
2975 
2976 restart:
2977 	bwillwrite();
2978 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2979 	if ((error = namei(&nd)) != 0)
2980 		return (error);
2981 	vp = nd.ni_vp;
2982 	if (vp->v_type != VDIR) {
2983 		error = ENOTDIR;
2984 		goto out;
2985 	}
2986 	/*
2987 	 * No rmdir "." please.
2988 	 */
2989 	if (nd.ni_dvp == vp) {
2990 		error = EINVAL;
2991 		goto out;
2992 	}
2993 	/*
2994 	 * The root of a mounted filesystem cannot be deleted.
2995 	 */
2996 	if (vp->v_vflag & VV_ROOT) {
2997 		error = EBUSY;
2998 		goto out;
2999 	}
3000 #ifdef MAC
3001 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3002 	    &nd.ni_cnd);
3003 	if (error)
3004 		goto out;
3005 #endif
3006 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3007 		NDFREE(&nd, NDF_ONLY_PNBUF);
3008 		if (nd.ni_dvp == vp)
3009 			vrele(nd.ni_dvp);
3010 		else
3011 			vput(nd.ni_dvp);
3012 		vput(vp);
3013 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3014 			return (error);
3015 		goto restart;
3016 	}
3017 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3018 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3019 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3020 	vn_finished_write(mp);
3021 out:
3022 	NDFREE(&nd, NDF_ONLY_PNBUF);
3023 	if (nd.ni_dvp == vp)
3024 		vrele(nd.ni_dvp);
3025 	else
3026 		vput(nd.ni_dvp);
3027 	vput(vp);
3028 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3029 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3030 	return (error);
3031 }
3032 
3033 #ifdef COMPAT_43
3034 /*
3035  * Read a block of directory entries in a filesystem independent format.
3036  */
3037 #ifndef _SYS_SYSPROTO_H_
3038 struct ogetdirentries_args {
3039 	int	fd;
3040 	char	*buf;
3041 	u_int	count;
3042 	long	*basep;
3043 };
3044 #endif
3045 int
3046 ogetdirentries(td, uap)
3047 	struct thread *td;
3048 	register struct ogetdirentries_args /* {
3049 		syscallarg(int) fd;
3050 		syscallarg(char *) buf;
3051 		syscallarg(u_int) count;
3052 		syscallarg(long *) basep;
3053 	} */ *uap;
3054 {
3055 	struct vnode *vp;
3056 	struct file *fp;
3057 	struct uio auio, kuio;
3058 	struct iovec aiov, kiov;
3059 	struct dirent *dp, *edp;
3060 	caddr_t dirbuf;
3061 	int error, eofflag, readcnt;
3062 	long loff;
3063 
3064 	/* XXX arbitrary sanity limit on `count'. */
3065 	if (SCARG(uap, count) > 64 * 1024)
3066 		return (EINVAL);
3067 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3068 		return (error);
3069 	if ((fp->f_flag & FREAD) == 0) {
3070 		fdrop(fp, td);
3071 		return (EBADF);
3072 	}
3073 	vp = (struct vnode *)fp->f_data;
3074 unionread:
3075 	if (vp->v_type != VDIR) {
3076 		fdrop(fp, td);
3077 		return (EINVAL);
3078 	}
3079 	aiov.iov_base = SCARG(uap, buf);
3080 	aiov.iov_len = SCARG(uap, count);
3081 	auio.uio_iov = &aiov;
3082 	auio.uio_iovcnt = 1;
3083 	auio.uio_rw = UIO_READ;
3084 	auio.uio_segflg = UIO_USERSPACE;
3085 	auio.uio_td = td;
3086 	auio.uio_resid = SCARG(uap, count);
3087 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3088 	loff = auio.uio_offset = fp->f_offset;
3089 #ifdef MAC
3090 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3091 	if (error) {
3092 		VOP_UNLOCK(vp, 0, td);
3093 		fdrop(fp, td);
3094 		return (error);
3095 	}
3096 #endif
3097 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3098 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3099 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3100 			    NULL, NULL);
3101 			fp->f_offset = auio.uio_offset;
3102 		} else
3103 #	endif
3104 	{
3105 		kuio = auio;
3106 		kuio.uio_iov = &kiov;
3107 		kuio.uio_segflg = UIO_SYSSPACE;
3108 		kiov.iov_len = SCARG(uap, count);
3109 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3110 		kiov.iov_base = dirbuf;
3111 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3112 			    NULL, NULL);
3113 		fp->f_offset = kuio.uio_offset;
3114 		if (error == 0) {
3115 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3116 			edp = (struct dirent *)&dirbuf[readcnt];
3117 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3118 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3119 					/*
3120 					 * The expected low byte of
3121 					 * dp->d_namlen is our dp->d_type.
3122 					 * The high MBZ byte of dp->d_namlen
3123 					 * is our dp->d_namlen.
3124 					 */
3125 					dp->d_type = dp->d_namlen;
3126 					dp->d_namlen = 0;
3127 #				else
3128 					/*
3129 					 * The dp->d_type is the high byte
3130 					 * of the expected dp->d_namlen,
3131 					 * so must be zero'ed.
3132 					 */
3133 					dp->d_type = 0;
3134 #				endif
3135 				if (dp->d_reclen > 0) {
3136 					dp = (struct dirent *)
3137 					    ((char *)dp + dp->d_reclen);
3138 				} else {
3139 					error = EIO;
3140 					break;
3141 				}
3142 			}
3143 			if (dp >= edp)
3144 				error = uiomove(dirbuf, readcnt, &auio);
3145 		}
3146 		FREE(dirbuf, M_TEMP);
3147 	}
3148 	VOP_UNLOCK(vp, 0, td);
3149 	if (error) {
3150 		fdrop(fp, td);
3151 		return (error);
3152 	}
3153 	if (SCARG(uap, count) == auio.uio_resid) {
3154 		if (union_dircheckp) {
3155 			error = union_dircheckp(td, &vp, fp);
3156 			if (error == -1)
3157 				goto unionread;
3158 			if (error) {
3159 				fdrop(fp, td);
3160 				return (error);
3161 			}
3162 		}
3163 		/*
3164 		 * XXX We could delay dropping the lock above but
3165 		 * union_dircheckp complicates things.
3166 		 */
3167 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3168 		if ((vp->v_vflag & VV_ROOT) &&
3169 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3170 			struct vnode *tvp = vp;
3171 			vp = vp->v_mount->mnt_vnodecovered;
3172 			VREF(vp);
3173 			fp->f_data = vp;
3174 			fp->f_offset = 0;
3175 			vput(tvp);
3176 			goto unionread;
3177 		}
3178 		VOP_UNLOCK(vp, 0, td);
3179 	}
3180 	error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3181 	fdrop(fp, td);
3182 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3183 	return (error);
3184 }
3185 #endif /* COMPAT_43 */
3186 
3187 /*
3188  * Read a block of directory entries in a filesystem independent format.
3189  */
3190 #ifndef _SYS_SYSPROTO_H_
3191 struct getdirentries_args {
3192 	int	fd;
3193 	char	*buf;
3194 	u_int	count;
3195 	long	*basep;
3196 };
3197 #endif
3198 int
3199 getdirentries(td, uap)
3200 	struct thread *td;
3201 	register struct getdirentries_args /* {
3202 		syscallarg(int) fd;
3203 		syscallarg(char *) buf;
3204 		syscallarg(u_int) count;
3205 		syscallarg(long *) basep;
3206 	} */ *uap;
3207 {
3208 	struct vnode *vp;
3209 	struct file *fp;
3210 	struct uio auio;
3211 	struct iovec aiov;
3212 	long loff;
3213 	int error, eofflag;
3214 
3215 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3216 		return (error);
3217 	if ((fp->f_flag & FREAD) == 0) {
3218 		fdrop(fp, td);
3219 		return (EBADF);
3220 	}
3221 	vp = (struct vnode *)fp->f_data;
3222 unionread:
3223 	if (vp->v_type != VDIR) {
3224 		fdrop(fp, td);
3225 		return (EINVAL);
3226 	}
3227 	aiov.iov_base = SCARG(uap, buf);
3228 	aiov.iov_len = SCARG(uap, count);
3229 	auio.uio_iov = &aiov;
3230 	auio.uio_iovcnt = 1;
3231 	auio.uio_rw = UIO_READ;
3232 	auio.uio_segflg = UIO_USERSPACE;
3233 	auio.uio_td = td;
3234 	auio.uio_resid = SCARG(uap, count);
3235 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3236 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3237 	loff = auio.uio_offset = fp->f_offset;
3238 #ifdef MAC
3239 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3240 	if (error == 0)
3241 #endif
3242 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3243 		    NULL);
3244 	fp->f_offset = auio.uio_offset;
3245 	VOP_UNLOCK(vp, 0, td);
3246 	if (error) {
3247 		fdrop(fp, td);
3248 		return (error);
3249 	}
3250 	if (SCARG(uap, count) == auio.uio_resid) {
3251 		if (union_dircheckp) {
3252 			error = union_dircheckp(td, &vp, fp);
3253 			if (error == -1)
3254 				goto unionread;
3255 			if (error) {
3256 				fdrop(fp, td);
3257 				return (error);
3258 			}
3259 		}
3260 		/*
3261 		 * XXX We could delay dropping the lock above but
3262 		 * union_dircheckp complicates things.
3263 		 */
3264 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3265 		if ((vp->v_vflag & VV_ROOT) &&
3266 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3267 			struct vnode *tvp = vp;
3268 			vp = vp->v_mount->mnt_vnodecovered;
3269 			VREF(vp);
3270 			fp->f_data = vp;
3271 			fp->f_offset = 0;
3272 			vput(tvp);
3273 			goto unionread;
3274 		}
3275 		VOP_UNLOCK(vp, 0, td);
3276 	}
3277 	if (SCARG(uap, basep) != NULL) {
3278 		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3279 	}
3280 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3281 	fdrop(fp, td);
3282 	return (error);
3283 }
3284 #ifndef _SYS_SYSPROTO_H_
3285 struct getdents_args {
3286 	int fd;
3287 	char *buf;
3288 	size_t count;
3289 };
3290 #endif
3291 int
3292 getdents(td, uap)
3293 	struct thread *td;
3294 	register struct getdents_args /* {
3295 		syscallarg(int) fd;
3296 		syscallarg(char *) buf;
3297 		syscallarg(u_int) count;
3298 	} */ *uap;
3299 {
3300 	struct getdirentries_args ap;
3301 	ap.fd = uap->fd;
3302 	ap.buf = uap->buf;
3303 	ap.count = uap->count;
3304 	ap.basep = NULL;
3305 	return getdirentries(td, &ap);
3306 }
3307 
3308 /*
3309  * Set the mode mask for creation of filesystem nodes.
3310  *
3311  * MP SAFE
3312  */
3313 #ifndef _SYS_SYSPROTO_H_
3314 struct umask_args {
3315 	int	newmask;
3316 };
3317 #endif
3318 int
3319 umask(td, uap)
3320 	struct thread *td;
3321 	struct umask_args /* {
3322 		syscallarg(int) newmask;
3323 	} */ *uap;
3324 {
3325 	register struct filedesc *fdp;
3326 
3327 	FILEDESC_LOCK(td->td_proc->p_fd);
3328 	fdp = td->td_proc->p_fd;
3329 	td->td_retval[0] = fdp->fd_cmask;
3330 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3331 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3332 	return (0);
3333 }
3334 
3335 /*
3336  * Void all references to file by ripping underlying filesystem
3337  * away from vnode.
3338  */
3339 #ifndef _SYS_SYSPROTO_H_
3340 struct revoke_args {
3341 	char	*path;
3342 };
3343 #endif
3344 /* ARGSUSED */
3345 int
3346 revoke(td, uap)
3347 	struct thread *td;
3348 	register struct revoke_args /* {
3349 		syscallarg(char *) path;
3350 	} */ *uap;
3351 {
3352 	struct mount *mp;
3353 	struct vnode *vp;
3354 	struct vattr vattr;
3355 	int error;
3356 	struct nameidata nd;
3357 
3358 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3359 	    td);
3360 	if ((error = namei(&nd)) != 0)
3361 		return (error);
3362 	vp = nd.ni_vp;
3363 	NDFREE(&nd, NDF_ONLY_PNBUF);
3364 	if (vp->v_type != VCHR) {
3365 		vput(vp);
3366 		return (EINVAL);
3367 	}
3368 #ifdef MAC
3369 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3370 	if (error) {
3371 		vput(vp);
3372 		return (error);
3373 	}
3374 #endif
3375 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3376 	if (error) {
3377 		vput(vp);
3378 		return (error);
3379 	}
3380 	VOP_UNLOCK(vp, 0, td);
3381 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3382 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3383 		if (error)
3384 			goto out;
3385 	}
3386 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3387 		goto out;
3388 	if (vcount(vp) > 1)
3389 		VOP_REVOKE(vp, REVOKEALL);
3390 	vn_finished_write(mp);
3391 out:
3392 	vrele(vp);
3393 	return (error);
3394 }
3395 
3396 /*
3397  * Convert a user file descriptor to a kernel file entry.
3398  * The file entry is locked upon returning.
3399  */
3400 int
3401 getvnode(fdp, fd, fpp)
3402 	struct filedesc *fdp;
3403 	int fd;
3404 	struct file **fpp;
3405 {
3406 	int error;
3407 	struct file *fp;
3408 
3409 	fp = NULL;
3410 	if (fdp == NULL)
3411 		error = EBADF;
3412 	else {
3413 		FILEDESC_LOCK(fdp);
3414 		if ((u_int)fd >= fdp->fd_nfiles ||
3415 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3416 			error = EBADF;
3417 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3418 			fp = NULL;
3419 			error = EINVAL;
3420 		} else {
3421 			fhold(fp);
3422 			error = 0;
3423 		}
3424 		FILEDESC_UNLOCK(fdp);
3425 	}
3426 	*fpp = fp;
3427 	return (error);
3428 }
3429 /*
3430  * Get (NFS) file handle
3431  */
3432 #ifndef _SYS_SYSPROTO_H_
3433 struct getfh_args {
3434 	char	*fname;
3435 	fhandle_t *fhp;
3436 };
3437 #endif
3438 int
3439 getfh(td, uap)
3440 	struct thread *td;
3441 	register struct getfh_args *uap;
3442 {
3443 	struct nameidata nd;
3444 	fhandle_t fh;
3445 	register struct vnode *vp;
3446 	int error;
3447 
3448 	/*
3449 	 * Must be super user
3450 	 */
3451 	error = suser(td);
3452 	if (error)
3453 		return (error);
3454 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3455 	error = namei(&nd);
3456 	if (error)
3457 		return (error);
3458 	NDFREE(&nd, NDF_ONLY_PNBUF);
3459 	vp = nd.ni_vp;
3460 	bzero(&fh, sizeof(fh));
3461 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3462 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3463 	vput(vp);
3464 	if (error)
3465 		return (error);
3466 	error = copyout(&fh, uap->fhp, sizeof (fh));
3467 	return (error);
3468 }
3469 
3470 /*
3471  * syscall for the rpc.lockd to use to translate a NFS file handle into
3472  * an open descriptor.
3473  *
3474  * warning: do not remove the suser() call or this becomes one giant
3475  * security hole.
3476  */
3477 #ifndef _SYS_SYSPROTO_H_
3478 struct fhopen_args {
3479 	const struct fhandle *u_fhp;
3480 	int flags;
3481 };
3482 #endif
3483 int
3484 fhopen(td, uap)
3485 	struct thread *td;
3486 	struct fhopen_args /* {
3487 		syscallarg(const struct fhandle *) u_fhp;
3488 		syscallarg(int) flags;
3489 	} */ *uap;
3490 {
3491 	struct proc *p = td->td_proc;
3492 	struct mount *mp;
3493 	struct vnode *vp;
3494 	struct fhandle fhp;
3495 	struct vattr vat;
3496 	struct vattr *vap = &vat;
3497 	struct flock lf;
3498 	struct file *fp;
3499 	register struct filedesc *fdp = p->p_fd;
3500 	int fmode, mode, error, type;
3501 	struct file *nfp;
3502 	int indx;
3503 
3504 	/*
3505 	 * Must be super user
3506 	 */
3507 	error = suser(td);
3508 	if (error)
3509 		return (error);
3510 
3511 	fmode = FFLAGS(SCARG(uap, flags));
3512 	/* why not allow a non-read/write open for our lockd? */
3513 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3514 		return (EINVAL);
3515 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3516 	if (error)
3517 		return(error);
3518 	/* find the mount point */
3519 	mp = vfs_getvfs(&fhp.fh_fsid);
3520 	if (mp == NULL)
3521 		return (ESTALE);
3522 	/* now give me my vnode, it gets returned to me locked */
3523 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3524 	if (error)
3525 		return (error);
3526  	/*
3527 	 * from now on we have to make sure not
3528 	 * to forget about the vnode
3529 	 * any error that causes an abort must vput(vp)
3530 	 * just set error = err and 'goto bad;'.
3531 	 */
3532 
3533 	/*
3534 	 * from vn_open
3535 	 */
3536 	if (vp->v_type == VLNK) {
3537 		error = EMLINK;
3538 		goto bad;
3539 	}
3540 	if (vp->v_type == VSOCK) {
3541 		error = EOPNOTSUPP;
3542 		goto bad;
3543 	}
3544 	mode = 0;
3545 	if (fmode & (FWRITE | O_TRUNC)) {
3546 		if (vp->v_type == VDIR) {
3547 			error = EISDIR;
3548 			goto bad;
3549 		}
3550 		error = vn_writechk(vp);
3551 		if (error)
3552 			goto bad;
3553 		mode |= VWRITE;
3554 	}
3555 	if (fmode & FREAD)
3556 		mode |= VREAD;
3557 	if (fmode & O_APPEND)
3558 		mode |= VAPPEND;
3559 #ifdef MAC
3560 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3561 	if (error)
3562 		goto bad;
3563 #endif
3564 	if (mode) {
3565 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3566 		if (error)
3567 			goto bad;
3568 	}
3569 	if (fmode & O_TRUNC) {
3570 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3571 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3572 			vrele(vp);
3573 			return (error);
3574 		}
3575 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3576 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3577 #ifdef MAC
3578 		/*
3579 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3580 		 * should be right.
3581 		 */
3582 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3583 		if (error == 0) {
3584 #endif
3585 			VATTR_NULL(vap);
3586 			vap->va_size = 0;
3587 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3588 #ifdef MAC
3589 		}
3590 #endif
3591 		vn_finished_write(mp);
3592 		if (error)
3593 			goto bad;
3594 	}
3595 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3596 	if (error)
3597 		goto bad;
3598 	/*
3599 	 * Make sure that a VM object is created for VMIO support.
3600 	 */
3601 	if (vn_canvmio(vp) == TRUE) {
3602 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3603 			goto bad;
3604 	}
3605 	if (fmode & FWRITE)
3606 		vp->v_writecount++;
3607 
3608 	/*
3609 	 * end of vn_open code
3610 	 */
3611 
3612 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3613 		if (fmode & FWRITE)
3614 			vp->v_writecount--;
3615 		goto bad;
3616 	}
3617 	fp = nfp;
3618 
3619 	/*
3620 	 * Hold an extra reference to avoid having fp ripped out
3621 	 * from under us while we block in the lock op
3622 	 */
3623 	fhold(fp);
3624 	nfp->f_data = vp;
3625 	nfp->f_flag = fmode & FMASK;
3626 	nfp->f_ops = &vnops;
3627 	nfp->f_type = DTYPE_VNODE;
3628 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3629 		lf.l_whence = SEEK_SET;
3630 		lf.l_start = 0;
3631 		lf.l_len = 0;
3632 		if (fmode & O_EXLOCK)
3633 			lf.l_type = F_WRLCK;
3634 		else
3635 			lf.l_type = F_RDLCK;
3636 		type = F_FLOCK;
3637 		if ((fmode & FNONBLOCK) == 0)
3638 			type |= F_WAIT;
3639 		VOP_UNLOCK(vp, 0, td);
3640 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3641 			    type)) != 0) {
3642 			/*
3643 			 * The lock request failed.  Normally close the
3644 			 * descriptor but handle the case where someone might
3645 			 * have dup()d or close()d it when we weren't looking.
3646 			 */
3647 			FILEDESC_LOCK(fdp);
3648 			if (fdp->fd_ofiles[indx] == fp) {
3649 				fdp->fd_ofiles[indx] = NULL;
3650 				FILEDESC_UNLOCK(fdp);
3651 				fdrop(fp, td);
3652 			} else
3653 				FILEDESC_UNLOCK(fdp);
3654 			/*
3655 			 * release our private reference
3656 			 */
3657 			fdrop(fp, td);
3658 			return(error);
3659 		}
3660 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3661 		fp->f_flag |= FHASLOCK;
3662 	}
3663 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3664 		vfs_object_create(vp, td, td->td_ucred);
3665 
3666 	VOP_UNLOCK(vp, 0, td);
3667 	fdrop(fp, td);
3668 	td->td_retval[0] = indx;
3669 	return (0);
3670 
3671 bad:
3672 	vput(vp);
3673 	return (error);
3674 }
3675 
3676 /*
3677  * Stat an (NFS) file handle.
3678  */
3679 #ifndef _SYS_SYSPROTO_H_
3680 struct fhstat_args {
3681 	struct fhandle *u_fhp;
3682 	struct stat *sb;
3683 };
3684 #endif
3685 int
3686 fhstat(td, uap)
3687 	struct thread *td;
3688 	register struct fhstat_args /* {
3689 		syscallarg(struct fhandle *) u_fhp;
3690 		syscallarg(struct stat *) sb;
3691 	} */ *uap;
3692 {
3693 	struct stat sb;
3694 	fhandle_t fh;
3695 	struct mount *mp;
3696 	struct vnode *vp;
3697 	int error;
3698 
3699 	/*
3700 	 * Must be super user
3701 	 */
3702 	error = suser(td);
3703 	if (error)
3704 		return (error);
3705 
3706 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3707 	if (error)
3708 		return (error);
3709 
3710 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3711 		return (ESTALE);
3712 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3713 		return (error);
3714 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3715 	vput(vp);
3716 	if (error)
3717 		return (error);
3718 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3719 	return (error);
3720 }
3721 
3722 /*
3723  * Implement fstatfs() for (NFS) file handles.
3724  */
3725 #ifndef _SYS_SYSPROTO_H_
3726 struct fhstatfs_args {
3727 	struct fhandle *u_fhp;
3728 	struct statfs *buf;
3729 };
3730 #endif
3731 int
3732 fhstatfs(td, uap)
3733 	struct thread *td;
3734 	struct fhstatfs_args /* {
3735 		syscallarg(struct fhandle) *u_fhp;
3736 		syscallarg(struct statfs) *buf;
3737 	} */ *uap;
3738 {
3739 	struct statfs *sp;
3740 	struct mount *mp;
3741 	struct vnode *vp;
3742 	struct statfs sb;
3743 	fhandle_t fh;
3744 	int error;
3745 
3746 	/*
3747 	 * Must be super user
3748 	 */
3749 	error = suser(td);
3750 	if (error)
3751 		return (error);
3752 
3753 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3754 		return (error);
3755 
3756 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3757 		return (ESTALE);
3758 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3759 		return (error);
3760 	mp = vp->v_mount;
3761 	sp = &mp->mnt_stat;
3762 	vput(vp);
3763 #ifdef MAC
3764 	error = mac_check_mount_stat(td->td_ucred, mp);
3765 	if (error)
3766 		return (error);
3767 #endif
3768 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3769 		return (error);
3770 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3771 	if (suser(td)) {
3772 		bcopy(sp, &sb, sizeof(sb));
3773 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3774 		sp = &sb;
3775 	}
3776 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3777 }
3778 
3779 /*
3780  * Syscall to push extended attribute configuration information into the
3781  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3782  * a command (int cmd), and attribute name and misc data.  For now, the
3783  * attribute name is left in userspace for consumption by the VFS_op.
3784  * It will probably be changed to be copied into sysspace by the
3785  * syscall in the future, once issues with various consumers of the
3786  * attribute code have raised their hands.
3787  *
3788  * Currently this is used only by UFS Extended Attributes.
3789  */
3790 int
3791 extattrctl(td, uap)
3792 	struct thread *td;
3793 	struct extattrctl_args /* {
3794 		syscallarg(const char *) path;
3795 		syscallarg(int) cmd;
3796 		syscallarg(const char *) filename;
3797 		syscallarg(int) attrnamespace;
3798 		syscallarg(const char *) attrname;
3799 	} */ *uap;
3800 {
3801 	struct vnode *filename_vp;
3802 	struct nameidata nd;
3803 	struct mount *mp, *mp_writable;
3804 	char attrname[EXTATTR_MAXNAMELEN];
3805 	int error;
3806 
3807 	/*
3808 	 * uap->attrname is not always defined.  We check again later when we
3809 	 * invoke the VFS call so as to pass in NULL there if needed.
3810 	 */
3811 	if (uap->attrname != NULL) {
3812 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3813 		    NULL);
3814 		if (error)
3815 			return (error);
3816 	}
3817 
3818 	/*
3819 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3820 	 * which VFS_EXTATTRCTL() will later release.
3821 	 */
3822 	filename_vp = NULL;
3823 	if (uap->filename != NULL) {
3824 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3825 		    uap->filename, td);
3826 		error = namei(&nd);
3827 		if (error)
3828 			return (error);
3829 		filename_vp = nd.ni_vp;
3830 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3831 	}
3832 
3833 	/* uap->path is always defined. */
3834 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3835 	error = namei(&nd);
3836 	if (error) {
3837 		if (filename_vp != NULL)
3838 			vput(filename_vp);
3839 		return (error);
3840 	}
3841 	mp = nd.ni_vp->v_mount;
3842 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3843 	NDFREE(&nd, 0);
3844 	if (error) {
3845 		if (filename_vp != NULL)
3846 			vput(filename_vp);
3847 		return (error);
3848 	}
3849 
3850 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3851 	    uap->attrname != NULL ? attrname : NULL, td);
3852 
3853 	vn_finished_write(mp_writable);
3854 	/*
3855 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3856 	 * filename_vp, so vrele it if it is defined.
3857 	 */
3858 	if (filename_vp != NULL)
3859 		vrele(filename_vp);
3860 	return (error);
3861 }
3862 
3863 /*-
3864  * Set a named extended attribute on a file or directory
3865  *
3866  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3867  *            kernelspace string pointer "attrname", userspace buffer
3868  *            pointer "data", buffer length "nbytes", thread "td".
3869  * Returns: 0 on success, an error number otherwise
3870  * Locks: none
3871  * References: vp must be a valid reference for the duration of the call
3872  */
3873 static int
3874 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3875     void *data, size_t nbytes, struct thread *td)
3876 {
3877 	struct mount *mp;
3878 	struct uio auio;
3879 	struct iovec aiov;
3880 	ssize_t cnt;
3881 	int error;
3882 
3883 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3884 	if (error)
3885 		return (error);
3886 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3887 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3888 
3889 	aiov.iov_base = data;
3890 	aiov.iov_len = nbytes;
3891 	auio.uio_iov = &aiov;
3892 	auio.uio_iovcnt = 1;
3893 	auio.uio_offset = 0;
3894 	if (nbytes > INT_MAX) {
3895 		error = EINVAL;
3896 		goto done;
3897 	}
3898 	auio.uio_resid = nbytes;
3899 	auio.uio_rw = UIO_WRITE;
3900 	auio.uio_segflg = UIO_USERSPACE;
3901 	auio.uio_td = td;
3902 	cnt = nbytes;
3903 
3904 #ifdef MAC
3905 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3906 	    attrname, &auio);
3907 	if (error)
3908 		goto done;
3909 #endif
3910 
3911 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3912 	    td->td_ucred, td);
3913 	cnt -= auio.uio_resid;
3914 	td->td_retval[0] = cnt;
3915 
3916 done:
3917 	VOP_UNLOCK(vp, 0, td);
3918 	vn_finished_write(mp);
3919 	return (error);
3920 }
3921 
3922 int
3923 extattr_set_fd(td, uap)
3924 	struct thread *td;
3925 	struct extattr_set_fd_args /* {
3926 		syscallarg(int) fd;
3927 		syscallarg(int) attrnamespace;
3928 		syscallarg(const char *) attrname;
3929 		syscallarg(void *) data;
3930 		syscallarg(size_t) nbytes;
3931 	} */ *uap;
3932 {
3933 	struct file *fp;
3934 	char attrname[EXTATTR_MAXNAMELEN];
3935 	int error;
3936 
3937 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3938 	if (error)
3939 		return (error);
3940 
3941 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3942 	if (error)
3943 		return (error);
3944 
3945 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3946 	    attrname, uap->data, uap->nbytes, td);
3947 	fdrop(fp, td);
3948 
3949 	return (error);
3950 }
3951 
3952 int
3953 extattr_set_file(td, uap)
3954 	struct thread *td;
3955 	struct extattr_set_file_args /* {
3956 		syscallarg(const char *) path;
3957 		syscallarg(int) attrnamespace;
3958 		syscallarg(const char *) attrname;
3959 		syscallarg(void *) data;
3960 		syscallarg(size_t) nbytes;
3961 	} */ *uap;
3962 {
3963 	struct nameidata nd;
3964 	char attrname[EXTATTR_MAXNAMELEN];
3965 	int error;
3966 
3967 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3968 	if (error)
3969 		return (error);
3970 
3971 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3972 	error = namei(&nd);
3973 	if (error)
3974 		return (error);
3975 	NDFREE(&nd, NDF_ONLY_PNBUF);
3976 
3977 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3978 	    uap->data, uap->nbytes, td);
3979 
3980 	vrele(nd.ni_vp);
3981 	return (error);
3982 }
3983 
3984 int
3985 extattr_set_link(td, uap)
3986 	struct thread *td;
3987 	struct extattr_set_link_args /* {
3988 		syscallarg(const char *) path;
3989 		syscallarg(int) attrnamespace;
3990 		syscallarg(const char *) attrname;
3991 		syscallarg(void *) data;
3992 		syscallarg(size_t) nbytes;
3993 	} */ *uap;
3994 {
3995 	struct nameidata nd;
3996 	char attrname[EXTATTR_MAXNAMELEN];
3997 	int error;
3998 
3999 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4000 	if (error)
4001 		return (error);
4002 
4003 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4004 	error = namei(&nd);
4005 	if (error)
4006 		return (error);
4007 	NDFREE(&nd, NDF_ONLY_PNBUF);
4008 
4009 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4010 	    uap->data, uap->nbytes, td);
4011 
4012 	vrele(nd.ni_vp);
4013 	return (error);
4014 }
4015 
4016 /*-
4017  * Get a named extended attribute on a file or directory
4018  *
4019  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4020  *            kernelspace string pointer "attrname", userspace buffer
4021  *            pointer "data", buffer length "nbytes", thread "td".
4022  * Returns: 0 on success, an error number otherwise
4023  * Locks: none
4024  * References: vp must be a valid reference for the duration of the call
4025  */
4026 static int
4027 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4028     void *data, size_t nbytes, struct thread *td)
4029 {
4030 	struct uio auio, *auiop;
4031 	struct iovec aiov;
4032 	ssize_t cnt;
4033 	size_t size, *sizep;
4034 	int error;
4035 
4036 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4037 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4038 
4039 	/*
4040 	 * Slightly unusual semantics: if the user provides a NULL data
4041 	 * pointer, they don't want to receive the data, just the
4042 	 * maximum read length.
4043 	 */
4044 	auiop = NULL;
4045 	sizep = NULL;
4046 	cnt = 0;
4047 	if (data != NULL) {
4048 		aiov.iov_base = data;
4049 		aiov.iov_len = nbytes;
4050 		auio.uio_iov = &aiov;
4051 		auio.uio_offset = 0;
4052 		if (nbytes > INT_MAX) {
4053 			error = EINVAL;
4054 			goto done;
4055 		}
4056 		auio.uio_resid = nbytes;
4057 		auio.uio_rw = UIO_READ;
4058 		auio.uio_segflg = UIO_USERSPACE;
4059 		auio.uio_td = td;
4060 		auiop = &auio;
4061 		cnt = nbytes;
4062 	} else
4063 		sizep = &size;
4064 
4065 #ifdef MAC
4066 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4067 	    attrname, &auio);
4068 	if (error)
4069 		goto done;
4070 #endif
4071 
4072 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4073 	    td->td_ucred, td);
4074 
4075 	if (auiop != NULL) {
4076 		cnt -= auio.uio_resid;
4077 		td->td_retval[0] = cnt;
4078 	} else
4079 		td->td_retval[0] = size;
4080 
4081 done:
4082 	VOP_UNLOCK(vp, 0, td);
4083 	return (error);
4084 }
4085 
4086 int
4087 extattr_get_fd(td, uap)
4088 	struct thread *td;
4089 	struct extattr_get_fd_args /* {
4090 		syscallarg(int) fd;
4091 		syscallarg(int) attrnamespace;
4092 		syscallarg(const char *) attrname;
4093 		syscallarg(void *) data;
4094 		syscallarg(size_t) nbytes;
4095 	} */ *uap;
4096 {
4097 	struct file *fp;
4098 	char attrname[EXTATTR_MAXNAMELEN];
4099 	int error;
4100 
4101 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4102 	if (error)
4103 		return (error);
4104 
4105 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4106 	if (error)
4107 		return (error);
4108 
4109 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4110 	    attrname, uap->data, uap->nbytes, td);
4111 
4112 	fdrop(fp, td);
4113 	return (error);
4114 }
4115 
4116 int
4117 extattr_get_file(td, uap)
4118 	struct thread *td;
4119 	struct extattr_get_file_args /* {
4120 		syscallarg(const char *) path;
4121 		syscallarg(int) attrnamespace;
4122 		syscallarg(const char *) attrname;
4123 		syscallarg(void *) data;
4124 		syscallarg(size_t) nbytes;
4125 	} */ *uap;
4126 {
4127 	struct nameidata nd;
4128 	char attrname[EXTATTR_MAXNAMELEN];
4129 	int error;
4130 
4131 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4132 	if (error)
4133 		return (error);
4134 
4135 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4136 	error = namei(&nd);
4137 	if (error)
4138 		return (error);
4139 	NDFREE(&nd, NDF_ONLY_PNBUF);
4140 
4141 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4142 	    uap->data, uap->nbytes, td);
4143 
4144 	vrele(nd.ni_vp);
4145 	return (error);
4146 }
4147 
4148 int
4149 extattr_get_link(td, uap)
4150 	struct thread *td;
4151 	struct extattr_get_link_args /* {
4152 		syscallarg(const char *) path;
4153 		syscallarg(int) attrnamespace;
4154 		syscallarg(const char *) attrname;
4155 		syscallarg(void *) data;
4156 		syscallarg(size_t) nbytes;
4157 	} */ *uap;
4158 {
4159 	struct nameidata nd;
4160 	char attrname[EXTATTR_MAXNAMELEN];
4161 	int error;
4162 
4163 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4164 	if (error)
4165 		return (error);
4166 
4167 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4168 	error = namei(&nd);
4169 	if (error)
4170 		return (error);
4171 	NDFREE(&nd, NDF_ONLY_PNBUF);
4172 
4173 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4174 	    uap->data, uap->nbytes, td);
4175 
4176 	vrele(nd.ni_vp);
4177 	return (error);
4178 }
4179 
4180 /*
4181  * extattr_delete_vp(): Delete a named extended attribute on a file or
4182  *                      directory
4183  *
4184  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4185  *            kernelspace string pointer "attrname", proc "p"
4186  * Returns: 0 on success, an error number otherwise
4187  * Locks: none
4188  * References: vp must be a valid reference for the duration of the call
4189  */
4190 static int
4191 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4192     struct thread *td)
4193 {
4194 	struct mount *mp;
4195 	int error;
4196 
4197 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4198 	if (error)
4199 		return (error);
4200 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4201 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4202 
4203 #ifdef MAC
4204 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4205 	    attrname, NULL);
4206 	if (error)
4207 		goto done;
4208 #endif
4209 
4210 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4211 	    td);
4212 #ifdef MAC
4213 done:
4214 #endif
4215 	VOP_UNLOCK(vp, 0, td);
4216 	vn_finished_write(mp);
4217 	return (error);
4218 }
4219 
4220 int
4221 extattr_delete_fd(td, uap)
4222 	struct thread *td;
4223 	struct extattr_delete_fd_args /* {
4224 		syscallarg(int) fd;
4225 		syscallarg(int) attrnamespace;
4226 		syscallarg(const char *) attrname;
4227 	} */ *uap;
4228 {
4229 	struct file *fp;
4230 	struct vnode *vp;
4231 	char attrname[EXTATTR_MAXNAMELEN];
4232 	int error;
4233 
4234 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4235 	if (error)
4236 		return (error);
4237 
4238 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4239 	if (error)
4240 		return (error);
4241 	vp = (struct vnode *)fp->f_data;
4242 
4243 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4244 	fdrop(fp, td);
4245 	return (error);
4246 }
4247 
4248 int
4249 extattr_delete_file(td, uap)
4250 	struct thread *td;
4251 	struct extattr_delete_file_args /* {
4252 		syscallarg(const char *) path;
4253 		syscallarg(int) attrnamespace;
4254 		syscallarg(const char *) attrname;
4255 	} */ *uap;
4256 {
4257 	struct nameidata nd;
4258 	char attrname[EXTATTR_MAXNAMELEN];
4259 	int error;
4260 
4261 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4262 	if (error)
4263 		return(error);
4264 
4265 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4266 	error = namei(&nd);
4267 	if (error)
4268 		return(error);
4269 	NDFREE(&nd, NDF_ONLY_PNBUF);
4270 
4271 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4272 	vrele(nd.ni_vp);
4273 	return(error);
4274 }
4275 
4276 int
4277 extattr_delete_link(td, uap)
4278 	struct thread *td;
4279 	struct extattr_delete_link_args /* {
4280 		syscallarg(const char *) path;
4281 		syscallarg(int) attrnamespace;
4282 		syscallarg(const char *) attrname;
4283 	} */ *uap;
4284 {
4285 	struct nameidata nd;
4286 	char attrname[EXTATTR_MAXNAMELEN];
4287 	int error;
4288 
4289 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4290 	if (error)
4291 		return(error);
4292 
4293 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4294 	error = namei(&nd);
4295 	if (error)
4296 		return(error);
4297 	NDFREE(&nd, NDF_ONLY_PNBUF);
4298 
4299 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4300 	vrele(nd.ni_vp);
4301 	return(error);
4302 }
4303