xref: /freebsd/sys/kern/vfs_syscalls.c (revision 7773002178c8dbc52b44e4d705f07706409af8e4)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 /* For 4.3 integer FS ID compatibility */
45 #include "opt_compat.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/sysent.h>
53 #include <sys/mac.h>
54 #include <sys/malloc.h>
55 #include <sys/mount.h>
56 #include <sys/mutex.h>
57 #include <sys/sysproto.h>
58 #include <sys/namei.h>
59 #include <sys/filedesc.h>
60 #include <sys/kernel.h>
61 #include <sys/fcntl.h>
62 #include <sys/file.h>
63 #include <sys/limits.h>
64 #include <sys/linker.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/proc.h>
70 #include <sys/dirent.h>
71 #include <sys/extattr.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 
76 #include <machine/stdarg.h>
77 
78 #include <vm/vm.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/uma.h>
82 
83 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86 static int setfmode(struct thread *td, struct vnode *, int);
87 static int setfflags(struct thread *td, struct vnode *, int);
88 static int setutimes(struct thread *td, struct vnode *,
89     const struct timespec *, int, int);
90 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91     struct thread *td);
92 
93 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94     size_t nbytes, struct thread *td);
95 
96 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97 int (*softdep_fsync_hook)(struct vnode *);
98 
99 /*
100  * The module initialization routine for POSIX asynchronous I/O will
101  * set this to the version of AIO that it implements.  (Zero means
102  * that it is not implemented.)  This value is used here by pathconf()
103  * and in kern_descrip.c by fpathconf().
104  */
105 int async_io_version;
106 
107 /*
108  * Sync each mounted filesystem.
109  */
110 #ifndef _SYS_SYSPROTO_H_
111 struct sync_args {
112         int     dummy;
113 };
114 #endif
115 
116 #ifdef DEBUG
117 static int syncprt = 0;
118 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119 #endif
120 
121 /* ARGSUSED */
122 int
123 sync(td, uap)
124 	struct thread *td;
125 	struct sync_args *uap;
126 {
127 	struct mount *mp, *nmp;
128 	int asyncflag;
129 
130 	mtx_lock(&mountlist_mtx);
131 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133 			nmp = TAILQ_NEXT(mp, mnt_list);
134 			continue;
135 		}
136 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138 			asyncflag = mp->mnt_flag & MNT_ASYNC;
139 			mp->mnt_flag &= ~MNT_ASYNC;
140 			vfs_msync(mp, MNT_NOWAIT);
141 			VFS_SYNC(mp, MNT_NOWAIT,
142 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143 			mp->mnt_flag |= asyncflag;
144 			vn_finished_write(mp);
145 		}
146 		mtx_lock(&mountlist_mtx);
147 		nmp = TAILQ_NEXT(mp, mnt_list);
148 		vfs_unbusy(mp, td);
149 	}
150 	mtx_unlock(&mountlist_mtx);
151 #if 0
152 /*
153  * XXX don't call vfs_bufstats() yet because that routine
154  * was not imported in the Lite2 merge.
155  */
156 #ifdef DIAGNOSTIC
157 	if (syncprt)
158 		vfs_bufstats();
159 #endif /* DIAGNOSTIC */
160 #endif
161 	return (0);
162 }
163 
164 /* XXX PRISON: could be per prison flag */
165 static int prison_quotas;
166 #if 0
167 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168 #endif
169 
170 /*
171  * Change filesystem quotas.
172  */
173 #ifndef _SYS_SYSPROTO_H_
174 struct quotactl_args {
175 	char *path;
176 	int cmd;
177 	int uid;
178 	caddr_t arg;
179 };
180 #endif
181 /* ARGSUSED */
182 int
183 quotactl(td, uap)
184 	struct thread *td;
185 	register struct quotactl_args /* {
186 		char *path;
187 		int cmd;
188 		int uid;
189 		caddr_t arg;
190 	} */ *uap;
191 {
192 	struct mount *mp;
193 	int error;
194 	struct nameidata nd;
195 
196 	if (jailed(td->td_ucred) && !prison_quotas)
197 		return (EPERM);
198 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199 	if ((error = namei(&nd)) != 0)
200 		return (error);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203 	vrele(nd.ni_vp);
204 	if (error)
205 		return (error);
206 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207 	vn_finished_write(mp);
208 	return (error);
209 }
210 
211 /*
212  * Get filesystem statistics.
213  */
214 #ifndef _SYS_SYSPROTO_H_
215 struct statfs_args {
216 	char *path;
217 	struct statfs *buf;
218 };
219 #endif
220 /* ARGSUSED */
221 int
222 statfs(td, uap)
223 	struct thread *td;
224 	register struct statfs_args /* {
225 		char *path;
226 		struct statfs *buf;
227 	} */ *uap;
228 {
229 	register struct mount *mp;
230 	register struct statfs *sp;
231 	int error;
232 	struct nameidata nd;
233 	struct statfs sb;
234 
235 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236 	if ((error = namei(&nd)) != 0)
237 		return (error);
238 	mp = nd.ni_vp->v_mount;
239 	sp = &mp->mnt_stat;
240 	NDFREE(&nd, NDF_ONLY_PNBUF);
241 	vrele(nd.ni_vp);
242 #ifdef MAC
243 	error = mac_check_mount_stat(td->td_ucred, mp);
244 	if (error)
245 		return (error);
246 #endif
247 	error = VFS_STATFS(mp, sp, td);
248 	if (error)
249 		return (error);
250 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251 	if (suser(td)) {
252 		bcopy(sp, &sb, sizeof(sb));
253 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254 		sp = &sb;
255 	}
256 	return (copyout(sp, uap->buf, sizeof(*sp)));
257 }
258 
259 /*
260  * Get filesystem statistics.
261  */
262 #ifndef _SYS_SYSPROTO_H_
263 struct fstatfs_args {
264 	int fd;
265 	struct statfs *buf;
266 };
267 #endif
268 /* ARGSUSED */
269 int
270 fstatfs(td, uap)
271 	struct thread *td;
272 	register struct fstatfs_args /* {
273 		int fd;
274 		struct statfs *buf;
275 	} */ *uap;
276 {
277 	struct file *fp;
278 	struct mount *mp;
279 	register struct statfs *sp;
280 	int error;
281 	struct statfs sb;
282 
283 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284 		return (error);
285 	mp = fp->f_vnode->v_mount;
286 	fdrop(fp, td);
287 	if (mp == NULL)
288 		return (EBADF);
289 #ifdef MAC
290 	error = mac_check_mount_stat(td->td_ucred, mp);
291 	if (error)
292 		return (error);
293 #endif
294 	sp = &mp->mnt_stat;
295 	error = VFS_STATFS(mp, sp, td);
296 	if (error)
297 		return (error);
298 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299 	if (suser(td)) {
300 		bcopy(sp, &sb, sizeof(sb));
301 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302 		sp = &sb;
303 	}
304 	return (copyout(sp, uap->buf, sizeof(*sp)));
305 }
306 
307 /*
308  * Get statistics on all filesystems.
309  */
310 #ifndef _SYS_SYSPROTO_H_
311 struct getfsstat_args {
312 	struct statfs *buf;
313 	long bufsize;
314 	int flags;
315 };
316 #endif
317 int
318 getfsstat(td, uap)
319 	struct thread *td;
320 	register struct getfsstat_args /* {
321 		struct statfs *buf;
322 		long bufsize;
323 		int flags;
324 	} */ *uap;
325 {
326 	register struct mount *mp, *nmp;
327 	register struct statfs *sp;
328 	caddr_t sfsp;
329 	long count, maxcount, error;
330 
331 	maxcount = uap->bufsize / sizeof(struct statfs);
332 	sfsp = (caddr_t)uap->buf;
333 	count = 0;
334 	mtx_lock(&mountlist_mtx);
335 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336 #ifdef MAC
337 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338 			nmp = TAILQ_NEXT(mp, mnt_list);
339 			continue;
340 		}
341 #endif
342 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343 			nmp = TAILQ_NEXT(mp, mnt_list);
344 			continue;
345 		}
346 		if (sfsp && count < maxcount) {
347 			sp = &mp->mnt_stat;
348 			/*
349 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351 			 * overrides MNT_WAIT.
352 			 */
353 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354 			    (uap->flags & MNT_WAIT)) &&
355 			    (error = VFS_STATFS(mp, sp, td))) {
356 				mtx_lock(&mountlist_mtx);
357 				nmp = TAILQ_NEXT(mp, mnt_list);
358 				vfs_unbusy(mp, td);
359 				continue;
360 			}
361 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362 			error = copyout(sp, sfsp, sizeof(*sp));
363 			if (error) {
364 				vfs_unbusy(mp, td);
365 				return (error);
366 			}
367 			sfsp += sizeof(*sp);
368 		}
369 		count++;
370 		mtx_lock(&mountlist_mtx);
371 		nmp = TAILQ_NEXT(mp, mnt_list);
372 		vfs_unbusy(mp, td);
373 	}
374 	mtx_unlock(&mountlist_mtx);
375 	if (sfsp && count > maxcount)
376 		td->td_retval[0] = maxcount;
377 	else
378 		td->td_retval[0] = count;
379 	return (0);
380 }
381 
382 /*
383  * Change current working directory to a given file descriptor.
384  */
385 #ifndef _SYS_SYSPROTO_H_
386 struct fchdir_args {
387 	int	fd;
388 };
389 #endif
390 /* ARGSUSED */
391 int
392 fchdir(td, uap)
393 	struct thread *td;
394 	struct fchdir_args /* {
395 		int fd;
396 	} */ *uap;
397 {
398 	register struct filedesc *fdp = td->td_proc->p_fd;
399 	struct vnode *vp, *tdp, *vpold;
400 	struct mount *mp;
401 	struct file *fp;
402 	int error;
403 
404 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405 		return (error);
406 	vp = fp->f_vnode;
407 	VREF(vp);
408 	fdrop(fp, td);
409 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410 	if (vp->v_type != VDIR)
411 		error = ENOTDIR;
412 #ifdef MAC
413 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414 	}
415 #endif
416 	else
417 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418 	while (!error && (mp = vp->v_mountedhere) != NULL) {
419 		if (vfs_busy(mp, 0, 0, td))
420 			continue;
421 		error = VFS_ROOT(mp, &tdp);
422 		vfs_unbusy(mp, td);
423 		if (error)
424 			break;
425 		vput(vp);
426 		vp = tdp;
427 	}
428 	if (error) {
429 		vput(vp);
430 		return (error);
431 	}
432 	VOP_UNLOCK(vp, 0, td);
433 	FILEDESC_LOCK(fdp);
434 	vpold = fdp->fd_cdir;
435 	fdp->fd_cdir = vp;
436 	FILEDESC_UNLOCK(fdp);
437 	vrele(vpold);
438 	return (0);
439 }
440 
441 /*
442  * Change current working directory (``.'').
443  */
444 #ifndef _SYS_SYSPROTO_H_
445 struct chdir_args {
446 	char	*path;
447 };
448 #endif
449 /* ARGSUSED */
450 int
451 chdir(td, uap)
452 	struct thread *td;
453 	struct chdir_args /* {
454 		char *path;
455 	} */ *uap;
456 {
457 
458 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459 }
460 
461 int
462 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463 {
464 	register struct filedesc *fdp = td->td_proc->p_fd;
465 	int error;
466 	struct nameidata nd;
467 	struct vnode *vp;
468 
469 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470 	if ((error = namei(&nd)) != 0)
471 		return (error);
472 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473 		vput(nd.ni_vp);
474 		NDFREE(&nd, NDF_ONLY_PNBUF);
475 		return (error);
476 	}
477 	VOP_UNLOCK(nd.ni_vp, 0, td);
478 	NDFREE(&nd, NDF_ONLY_PNBUF);
479 	FILEDESC_LOCK(fdp);
480 	vp = fdp->fd_cdir;
481 	fdp->fd_cdir = nd.ni_vp;
482 	FILEDESC_UNLOCK(fdp);
483 	vrele(vp);
484 	return (0);
485 }
486 
487 /*
488  * Helper function for raised chroot(2) security function:  Refuse if
489  * any filedescriptors are open directories.
490  */
491 static int
492 chroot_refuse_vdir_fds(fdp)
493 	struct filedesc *fdp;
494 {
495 	struct vnode *vp;
496 	struct file *fp;
497 	int fd;
498 
499 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501 		fp = fget_locked(fdp, fd);
502 		if (fp == NULL)
503 			continue;
504 		if (fp->f_type == DTYPE_VNODE) {
505 			vp = fp->f_vnode;
506 			if (vp->v_type == VDIR)
507 				return (EPERM);
508 		}
509 	}
510 	return (0);
511 }
512 
513 /*
514  * This sysctl determines if we will allow a process to chroot(2) if it
515  * has a directory open:
516  *	0: disallowed for all processes.
517  *	1: allowed for processes that were not already chroot(2)'ed.
518  *	2: allowed for all processes.
519  */
520 
521 static int chroot_allow_open_directories = 1;
522 
523 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524      &chroot_allow_open_directories, 0, "");
525 
526 /*
527  * Change notion of root (``/'') directory.
528  */
529 #ifndef _SYS_SYSPROTO_H_
530 struct chroot_args {
531 	char	*path;
532 };
533 #endif
534 /* ARGSUSED */
535 int
536 chroot(td, uap)
537 	struct thread *td;
538 	struct chroot_args /* {
539 		char *path;
540 	} */ *uap;
541 {
542 	int error;
543 	struct nameidata nd;
544 
545 	error = suser_cred(td->td_ucred, PRISON_ROOT);
546 	if (error)
547 		return (error);
548 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549 	mtx_lock(&Giant);
550 	error = namei(&nd);
551 	if (error)
552 		goto error;
553 	if ((error = change_dir(nd.ni_vp, td)) != 0)
554 		goto e_vunlock;
555 #ifdef MAC
556 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557 		goto e_vunlock;
558 #endif
559 	VOP_UNLOCK(nd.ni_vp, 0, td);
560 	error = change_root(nd.ni_vp, td);
561 	vrele(nd.ni_vp);
562 	NDFREE(&nd, NDF_ONLY_PNBUF);
563 	mtx_unlock(&Giant);
564 	return (error);
565 e_vunlock:
566 	vput(nd.ni_vp);
567 error:
568 	mtx_unlock(&Giant);
569 	NDFREE(&nd, NDF_ONLY_PNBUF);
570 	return (error);
571 }
572 
573 /*
574  * Common routine for chroot and chdir.  Callers must provide a locked vnode
575  * instance.
576  */
577 int
578 change_dir(vp, td)
579 	struct vnode *vp;
580 	struct thread *td;
581 {
582 	int error;
583 
584 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585 	if (vp->v_type != VDIR)
586 		return (ENOTDIR);
587 #ifdef MAC
588 	error = mac_check_vnode_chdir(td->td_ucred, vp);
589 	if (error)
590 		return (error);
591 #endif
592 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593 	return (error);
594 }
595 
596 /*
597  * Common routine for kern_chroot() and jail_attach().  The caller is
598  * responsible for invoking suser() and mac_check_chroot() to authorize this
599  * operation.
600  */
601 int
602 change_root(vp, td)
603 	struct vnode *vp;
604 	struct thread *td;
605 {
606 	struct filedesc *fdp;
607 	struct vnode *oldvp;
608 	int error;
609 
610 	mtx_assert(&Giant, MA_OWNED);
611 	fdp = td->td_proc->p_fd;
612 	FILEDESC_LOCK(fdp);
613 	if (chroot_allow_open_directories == 0 ||
614 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615 		error = chroot_refuse_vdir_fds(fdp);
616 		if (error) {
617 			FILEDESC_UNLOCK(fdp);
618 			return (error);
619 		}
620 	}
621 	oldvp = fdp->fd_rdir;
622 	fdp->fd_rdir = vp;
623 	VREF(fdp->fd_rdir);
624 	if (!fdp->fd_jdir) {
625 		fdp->fd_jdir = vp;
626 		VREF(fdp->fd_jdir);
627 	}
628 	FILEDESC_UNLOCK(fdp);
629 	vrele(oldvp);
630 	return (0);
631 }
632 
633 /*
634  * Check permissions, allocate an open file structure,
635  * and call the device open routine if any.
636  */
637 #ifndef _SYS_SYSPROTO_H_
638 struct open_args {
639 	char	*path;
640 	int	flags;
641 	int	mode;
642 };
643 #endif
644 int
645 open(td, uap)
646 	struct thread *td;
647 	register struct open_args /* {
648 		char *path;
649 		int flags;
650 		int mode;
651 	} */ *uap;
652 {
653 
654 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655 }
656 
657 int
658 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659     int mode)
660 {
661 	struct proc *p = td->td_proc;
662 	struct filedesc *fdp = p->p_fd;
663 	struct file *fp;
664 	struct vnode *vp;
665 	struct vattr vat;
666 	struct mount *mp;
667 	int cmode;
668 	struct file *nfp;
669 	int type, indx, error;
670 	struct flock lf;
671 	struct nameidata nd;
672 
673 	if ((flags & O_ACCMODE) == O_ACCMODE)
674 		return (EINVAL);
675 	flags = FFLAGS(flags);
676 	error = falloc(td, &nfp, &indx);
677 	if (error)
678 		return (error);
679 	/* An extra reference on `nfp' has been held for us by falloc(). */
680 	fp = nfp;
681 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
682 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
683 	td->td_dupfd = -1;		/* XXX check for fdopen */
684 	error = vn_open(&nd, &flags, cmode, indx);
685 	if (error) {
686 
687 		/*
688 		 * If the vn_open replaced the method vector, something
689 		 * wonderous happened deep below and we just pass it up
690 		 * pretending we know what we do.
691 		 */
692 		if (error == ENXIO && fp->f_ops != &badfileops) {
693 			fdrop(fp, td);
694 			td->td_retval[0] = indx;
695 			return (0);
696 		}
697 
698 		/*
699 		 * release our own reference
700 		 */
701 		fdrop(fp, td);
702 
703 		/*
704 		 * handle special fdopen() case.  bleh.  dupfdopen() is
705 		 * responsible for dropping the old contents of ofiles[indx]
706 		 * if it succeeds.
707 		 */
708 		if ((error == ENODEV || error == ENXIO) &&
709 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
710 		    (error =
711 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
712 			td->td_retval[0] = indx;
713 			return (0);
714 		}
715 		/*
716 		 * Clean up the descriptor, but only if another thread hadn't
717 		 * replaced or closed it.
718 		 */
719 		FILEDESC_LOCK(fdp);
720 		if (fdp->fd_ofiles[indx] == fp) {
721 			fdp->fd_ofiles[indx] = NULL;
722 			FILEDESC_UNLOCK(fdp);
723 			fdrop(fp, td);
724 		} else
725 			FILEDESC_UNLOCK(fdp);
726 
727 		if (error == ERESTART)
728 			error = EINTR;
729 		return (error);
730 	}
731 	td->td_dupfd = 0;
732 	NDFREE(&nd, NDF_ONLY_PNBUF);
733 	vp = nd.ni_vp;
734 
735 	/*
736 	 * There should be 2 references on the file, one from the descriptor
737 	 * table, and one for us.
738 	 *
739 	 * Handle the case where someone closed the file (via its file
740 	 * descriptor) while we were blocked.  The end result should look
741 	 * like opening the file succeeded but it was immediately closed.
742 	 */
743 	FILEDESC_LOCK(fdp);
744 	FILE_LOCK(fp);
745 	if (fp->f_count == 1) {
746 		KASSERT(fdp->fd_ofiles[indx] != fp,
747 		    ("Open file descriptor lost all refs"));
748 		FILEDESC_UNLOCK(fdp);
749 		FILE_UNLOCK(fp);
750 		VOP_UNLOCK(vp, 0, td);
751 		vn_close(vp, flags & FMASK, fp->f_cred, td);
752 		fdrop(fp, td);
753 		td->td_retval[0] = indx;
754 		return 0;
755 	}
756 	fp->f_vnode = vp;
757 	fp->f_data = vp;
758 	fp->f_flag = flags & FMASK;
759 	fp->f_ops = &vnops;
760 	fp->f_seqcount = 1;
761 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
762 	FILEDESC_UNLOCK(fdp);
763 	FILE_UNLOCK(fp);
764 
765 	/* assert that vn_open created a backing object if one is needed */
766 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
767 		("open: vmio vnode has no backing object after vn_open"));
768 
769 	VOP_UNLOCK(vp, 0, td);
770 	if (flags & (O_EXLOCK | O_SHLOCK)) {
771 		lf.l_whence = SEEK_SET;
772 		lf.l_start = 0;
773 		lf.l_len = 0;
774 		if (flags & O_EXLOCK)
775 			lf.l_type = F_WRLCK;
776 		else
777 			lf.l_type = F_RDLCK;
778 		type = F_FLOCK;
779 		if ((flags & FNONBLOCK) == 0)
780 			type |= F_WAIT;
781 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
782 			    type)) != 0)
783 			goto bad;
784 		fp->f_flag |= FHASLOCK;
785 	}
786 	if (flags & O_TRUNC) {
787 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
788 			goto bad;
789 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
790 		VATTR_NULL(&vat);
791 		vat.va_size = 0;
792 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
793 #ifdef MAC
794 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
795 		if (error == 0)
796 #endif
797 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
798 		VOP_UNLOCK(vp, 0, td);
799 		vn_finished_write(mp);
800 		if (error)
801 			goto bad;
802 	}
803 	/*
804 	 * Release our private reference, leaving the one associated with
805 	 * the descriptor table intact.
806 	 */
807 	fdrop(fp, td);
808 	td->td_retval[0] = indx;
809 	return (0);
810 bad:
811 	FILEDESC_LOCK(fdp);
812 	if (fdp->fd_ofiles[indx] == fp) {
813 		fdp->fd_ofiles[indx] = NULL;
814 		FILEDESC_UNLOCK(fdp);
815 		fdrop(fp, td);
816 	} else
817 		FILEDESC_UNLOCK(fdp);
818 	fdrop(fp, td);
819 	return (error);
820 }
821 
822 #ifdef COMPAT_43
823 /*
824  * Create a file.
825  */
826 #ifndef _SYS_SYSPROTO_H_
827 struct ocreat_args {
828 	char	*path;
829 	int	mode;
830 };
831 #endif
832 int
833 ocreat(td, uap)
834 	struct thread *td;
835 	register struct ocreat_args /* {
836 		char *path;
837 		int mode;
838 	} */ *uap;
839 {
840 	struct open_args /* {
841 		char *path;
842 		int flags;
843 		int mode;
844 	} */ nuap;
845 
846 	nuap.path = uap->path;
847 	nuap.mode = uap->mode;
848 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
849 	return (open(td, &nuap));
850 }
851 #endif /* COMPAT_43 */
852 
853 /*
854  * Create a special file.
855  */
856 #ifndef _SYS_SYSPROTO_H_
857 struct mknod_args {
858 	char	*path;
859 	int	mode;
860 	int	dev;
861 };
862 #endif
863 /* ARGSUSED */
864 int
865 mknod(td, uap)
866 	struct thread *td;
867 	register struct mknod_args /* {
868 		char *path;
869 		int mode;
870 		int dev;
871 	} */ *uap;
872 {
873 
874 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
875 }
876 
877 int
878 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
879     int dev)
880 {
881 	struct vnode *vp;
882 	struct mount *mp;
883 	struct vattr vattr;
884 	int error;
885 	int whiteout = 0;
886 	struct nameidata nd;
887 
888 	switch (mode & S_IFMT) {
889 	case S_IFCHR:
890 	case S_IFBLK:
891 		error = suser(td);
892 		break;
893 	default:
894 		error = suser_cred(td->td_ucred, PRISON_ROOT);
895 		break;
896 	}
897 	if (error)
898 		return (error);
899 restart:
900 	bwillwrite();
901 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
902 	if ((error = namei(&nd)) != 0)
903 		return (error);
904 	vp = nd.ni_vp;
905 	if (vp != NULL) {
906 		NDFREE(&nd, NDF_ONLY_PNBUF);
907 		vrele(vp);
908 		if (vp == nd.ni_dvp)
909 			vrele(nd.ni_dvp);
910 		else
911 			vput(nd.ni_dvp);
912 		return (EEXIST);
913 	} else {
914 		VATTR_NULL(&vattr);
915 		FILEDESC_LOCK(td->td_proc->p_fd);
916 		vattr.va_mode = (mode & ALLPERMS) &
917 		    ~td->td_proc->p_fd->fd_cmask;
918 		FILEDESC_UNLOCK(td->td_proc->p_fd);
919 		vattr.va_rdev = dev;
920 		whiteout = 0;
921 
922 		switch (mode & S_IFMT) {
923 		case S_IFMT:	/* used by badsect to flag bad sectors */
924 			vattr.va_type = VBAD;
925 			break;
926 		case S_IFCHR:
927 			vattr.va_type = VCHR;
928 			break;
929 		case S_IFBLK:
930 			vattr.va_type = VBLK;
931 			break;
932 		case S_IFWHT:
933 			whiteout = 1;
934 			break;
935 		default:
936 			error = EINVAL;
937 			break;
938 		}
939 	}
940 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
941 		NDFREE(&nd, NDF_ONLY_PNBUF);
942 		vput(nd.ni_dvp);
943 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
944 			return (error);
945 		goto restart;
946 	}
947 #ifdef MAC
948 	if (error == 0 && !whiteout)
949 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
950 		    &nd.ni_cnd, &vattr);
951 #endif
952 	if (!error) {
953 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
954 		if (whiteout)
955 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
956 		else {
957 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
958 						&nd.ni_cnd, &vattr);
959 			if (error == 0)
960 				vput(nd.ni_vp);
961 		}
962 	}
963 	NDFREE(&nd, NDF_ONLY_PNBUF);
964 	vput(nd.ni_dvp);
965 	vn_finished_write(mp);
966 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
967 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
968 	return (error);
969 }
970 
971 /*
972  * Create a named pipe.
973  */
974 #ifndef _SYS_SYSPROTO_H_
975 struct mkfifo_args {
976 	char	*path;
977 	int	mode;
978 };
979 #endif
980 /* ARGSUSED */
981 int
982 mkfifo(td, uap)
983 	struct thread *td;
984 	register struct mkfifo_args /* {
985 		char *path;
986 		int mode;
987 	} */ *uap;
988 {
989 
990 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
991 }
992 
993 int
994 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
995 {
996 	struct mount *mp;
997 	struct vattr vattr;
998 	int error;
999 	struct nameidata nd;
1000 
1001 restart:
1002 	bwillwrite();
1003 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1004 	if ((error = namei(&nd)) != 0)
1005 		return (error);
1006 	if (nd.ni_vp != NULL) {
1007 		NDFREE(&nd, NDF_ONLY_PNBUF);
1008 		vrele(nd.ni_vp);
1009 		if (nd.ni_vp == nd.ni_dvp)
1010 			vrele(nd.ni_dvp);
1011 		else
1012 			vput(nd.ni_dvp);
1013 		return (EEXIST);
1014 	}
1015 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1016 		NDFREE(&nd, NDF_ONLY_PNBUF);
1017 		vput(nd.ni_dvp);
1018 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1019 			return (error);
1020 		goto restart;
1021 	}
1022 	VATTR_NULL(&vattr);
1023 	vattr.va_type = VFIFO;
1024 	FILEDESC_LOCK(td->td_proc->p_fd);
1025 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1026 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1027 #ifdef MAC
1028 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1029 	    &vattr);
1030 	if (error)
1031 		goto out;
1032 #endif
1033 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1034 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1035 	if (error == 0)
1036 		vput(nd.ni_vp);
1037 #ifdef MAC
1038 out:
1039 #endif
1040 	NDFREE(&nd, NDF_ONLY_PNBUF);
1041 	vput(nd.ni_dvp);
1042 	vn_finished_write(mp);
1043 	return (error);
1044 }
1045 
1046 /*
1047  * Make a hard file link.
1048  */
1049 #ifndef _SYS_SYSPROTO_H_
1050 struct link_args {
1051 	char	*path;
1052 	char	*link;
1053 };
1054 #endif
1055 /* ARGSUSED */
1056 int
1057 link(td, uap)
1058 	struct thread *td;
1059 	register struct link_args /* {
1060 		char *path;
1061 		char *link;
1062 	} */ *uap;
1063 {
1064 
1065 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1066 }
1067 
1068 int
1069 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1070 {
1071 	struct vnode *vp;
1072 	struct mount *mp;
1073 	struct nameidata nd;
1074 	int error;
1075 
1076 	bwillwrite();
1077 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1078 	if ((error = namei(&nd)) != 0)
1079 		return (error);
1080 	NDFREE(&nd, NDF_ONLY_PNBUF);
1081 	vp = nd.ni_vp;
1082 	if (vp->v_type == VDIR) {
1083 		vrele(vp);
1084 		return (EPERM);		/* POSIX */
1085 	}
1086 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1087 		vrele(vp);
1088 		return (error);
1089 	}
1090 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1091 	if ((error = namei(&nd)) == 0) {
1092 		if (nd.ni_vp != NULL) {
1093 			vrele(nd.ni_vp);
1094 			if (nd.ni_dvp == nd.ni_vp)
1095 				vrele(nd.ni_dvp);
1096 			else
1097 				vput(nd.ni_dvp);
1098 			error = EEXIST;
1099 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1100 		    == 0) {
1101 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1102 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1103 #ifdef MAC
1104 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1105 			    vp, &nd.ni_cnd);
1106 			if (error == 0)
1107 #endif
1108 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1109 			VOP_UNLOCK(vp, 0, td);
1110 			vput(nd.ni_dvp);
1111 		}
1112 		NDFREE(&nd, NDF_ONLY_PNBUF);
1113 	}
1114 	vrele(vp);
1115 	vn_finished_write(mp);
1116 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1117 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1118 	return (error);
1119 }
1120 
1121 /*
1122  * Make a symbolic link.
1123  */
1124 #ifndef _SYS_SYSPROTO_H_
1125 struct symlink_args {
1126 	char	*path;
1127 	char	*link;
1128 };
1129 #endif
1130 /* ARGSUSED */
1131 int
1132 symlink(td, uap)
1133 	struct thread *td;
1134 	register struct symlink_args /* {
1135 		char *path;
1136 		char *link;
1137 	} */ *uap;
1138 {
1139 
1140 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1141 }
1142 
1143 int
1144 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1145 {
1146 	struct mount *mp;
1147 	struct vattr vattr;
1148 	char *syspath;
1149 	int error;
1150 	struct nameidata nd;
1151 
1152 	if (segflg == UIO_SYSSPACE) {
1153 		syspath = path;
1154 	} else {
1155 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1156 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1157 			goto out;
1158 	}
1159 restart:
1160 	bwillwrite();
1161 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1162 	if ((error = namei(&nd)) != 0)
1163 		goto out;
1164 	if (nd.ni_vp) {
1165 		NDFREE(&nd, NDF_ONLY_PNBUF);
1166 		vrele(nd.ni_vp);
1167 		if (nd.ni_vp == nd.ni_dvp)
1168 			vrele(nd.ni_dvp);
1169 		else
1170 			vput(nd.ni_dvp);
1171 		error = EEXIST;
1172 		goto out;
1173 	}
1174 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1175 		NDFREE(&nd, NDF_ONLY_PNBUF);
1176 		vput(nd.ni_dvp);
1177 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1178 			return (error);
1179 		goto restart;
1180 	}
1181 	VATTR_NULL(&vattr);
1182 	FILEDESC_LOCK(td->td_proc->p_fd);
1183 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1184 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1185 #ifdef MAC
1186 	vattr.va_type = VLNK;
1187 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1188 	    &vattr);
1189 	if (error)
1190 		goto out2;
1191 #endif
1192 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1193 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1194 	if (error == 0)
1195 		vput(nd.ni_vp);
1196 #ifdef MAC
1197 out2:
1198 #endif
1199 	NDFREE(&nd, NDF_ONLY_PNBUF);
1200 	vput(nd.ni_dvp);
1201 	vn_finished_write(mp);
1202 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1203 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1204 out:
1205 	if (segflg != UIO_SYSSPACE)
1206 		uma_zfree(namei_zone, syspath);
1207 	return (error);
1208 }
1209 
1210 /*
1211  * Delete a whiteout from the filesystem.
1212  */
1213 /* ARGSUSED */
1214 int
1215 undelete(td, uap)
1216 	struct thread *td;
1217 	register struct undelete_args /* {
1218 		char *path;
1219 	} */ *uap;
1220 {
1221 	int error;
1222 	struct mount *mp;
1223 	struct nameidata nd;
1224 
1225 restart:
1226 	bwillwrite();
1227 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1228 	    uap->path, td);
1229 	error = namei(&nd);
1230 	if (error)
1231 		return (error);
1232 
1233 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1234 		NDFREE(&nd, NDF_ONLY_PNBUF);
1235 		if (nd.ni_vp)
1236 			vrele(nd.ni_vp);
1237 		if (nd.ni_vp == nd.ni_dvp)
1238 			vrele(nd.ni_dvp);
1239 		else
1240 			vput(nd.ni_dvp);
1241 		return (EEXIST);
1242 	}
1243 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1244 		NDFREE(&nd, NDF_ONLY_PNBUF);
1245 		vput(nd.ni_dvp);
1246 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1247 			return (error);
1248 		goto restart;
1249 	}
1250 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1251 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1252 	NDFREE(&nd, NDF_ONLY_PNBUF);
1253 	vput(nd.ni_dvp);
1254 	vn_finished_write(mp);
1255 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1256 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1257 	return (error);
1258 }
1259 
1260 /*
1261  * Delete a name from the filesystem.
1262  */
1263 #ifndef _SYS_SYSPROTO_H_
1264 struct unlink_args {
1265 	char	*path;
1266 };
1267 #endif
1268 /* ARGSUSED */
1269 int
1270 unlink(td, uap)
1271 	struct thread *td;
1272 	struct unlink_args /* {
1273 		char *path;
1274 	} */ *uap;
1275 {
1276 
1277 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1278 }
1279 
1280 int
1281 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1282 {
1283 	struct mount *mp;
1284 	struct vnode *vp;
1285 	int error;
1286 	struct nameidata nd;
1287 
1288 restart:
1289 	bwillwrite();
1290 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1291 	if ((error = namei(&nd)) != 0)
1292 		return (error);
1293 	vp = nd.ni_vp;
1294 	if (vp->v_type == VDIR)
1295 		error = EPERM;		/* POSIX */
1296 	else {
1297 		/*
1298 		 * The root of a mounted filesystem cannot be deleted.
1299 		 *
1300 		 * XXX: can this only be a VDIR case?
1301 		 */
1302 		if (vp->v_vflag & VV_ROOT)
1303 			error = EBUSY;
1304 	}
1305 	if (error == 0) {
1306 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1307 			NDFREE(&nd, NDF_ONLY_PNBUF);
1308 			if (vp == nd.ni_dvp)
1309 				vrele(vp);
1310 			else
1311 				vput(vp);
1312 			vput(nd.ni_dvp);
1313 			if ((error = vn_start_write(NULL, &mp,
1314 			    V_XSLEEP | PCATCH)) != 0)
1315 				return (error);
1316 			goto restart;
1317 		}
1318 #ifdef MAC
1319 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1320 		    &nd.ni_cnd);
1321 		if (error)
1322 			goto out;
1323 #endif
1324 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1325 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1326 #ifdef MAC
1327 out:
1328 #endif
1329 		vn_finished_write(mp);
1330 	}
1331 	NDFREE(&nd, NDF_ONLY_PNBUF);
1332 	if (vp == nd.ni_dvp)
1333 		vrele(vp);
1334 	else
1335 		vput(vp);
1336 	vput(nd.ni_dvp);
1337 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1338 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1339 	return (error);
1340 }
1341 
1342 /*
1343  * Reposition read/write file offset.
1344  */
1345 #ifndef _SYS_SYSPROTO_H_
1346 struct lseek_args {
1347 	int	fd;
1348 	int	pad;
1349 	off_t	offset;
1350 	int	whence;
1351 };
1352 #endif
1353 int
1354 lseek(td, uap)
1355 	struct thread *td;
1356 	register struct lseek_args /* {
1357 		int fd;
1358 		int pad;
1359 		off_t offset;
1360 		int whence;
1361 	} */ *uap;
1362 {
1363 	struct ucred *cred = td->td_ucred;
1364 	struct file *fp;
1365 	struct vnode *vp;
1366 	struct vattr vattr;
1367 	off_t offset;
1368 	int error, noneg;
1369 
1370 	if ((error = fget(td, uap->fd, &fp)) != 0)
1371 		return (error);
1372 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1373 		fdrop(fp, td);
1374 		return (ESPIPE);
1375 	}
1376 	vp = fp->f_vnode;
1377 	noneg = (vp->v_type != VCHR);
1378 	offset = uap->offset;
1379 	switch (uap->whence) {
1380 	case L_INCR:
1381 		if (noneg &&
1382 		    (fp->f_offset < 0 ||
1383 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1384 			error = EOVERFLOW;
1385 			break;
1386 		}
1387 		offset += fp->f_offset;
1388 		break;
1389 	case L_XTND:
1390 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1391 		error = VOP_GETATTR(vp, &vattr, cred, td);
1392 		VOP_UNLOCK(vp, 0, td);
1393 		if (error)
1394 			break;
1395 		if (noneg &&
1396 		    (vattr.va_size > OFF_MAX ||
1397 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1398 			error = EOVERFLOW;
1399 			break;
1400 		}
1401 		offset += vattr.va_size;
1402 		break;
1403 	case L_SET:
1404 		break;
1405 	default:
1406 		error = EINVAL;
1407 	}
1408 	if (error == 0 && noneg && offset < 0)
1409 		error = EINVAL;
1410 	if (error != 0) {
1411 		fdrop(fp, td);
1412 		return (error);
1413 	}
1414 	fp->f_offset = offset;
1415 	*(off_t *)(td->td_retval) = fp->f_offset;
1416 	fdrop(fp, td);
1417 	return (0);
1418 }
1419 
1420 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1421 /*
1422  * Reposition read/write file offset.
1423  */
1424 #ifndef _SYS_SYSPROTO_H_
1425 struct olseek_args {
1426 	int	fd;
1427 	long	offset;
1428 	int	whence;
1429 };
1430 #endif
1431 int
1432 olseek(td, uap)
1433 	struct thread *td;
1434 	register struct olseek_args /* {
1435 		int fd;
1436 		long offset;
1437 		int whence;
1438 	} */ *uap;
1439 {
1440 	struct lseek_args /* {
1441 		int fd;
1442 		int pad;
1443 		off_t offset;
1444 		int whence;
1445 	} */ nuap;
1446 	int error;
1447 
1448 	nuap.fd = uap->fd;
1449 	nuap.offset = uap->offset;
1450 	nuap.whence = uap->whence;
1451 	error = lseek(td, &nuap);
1452 	return (error);
1453 }
1454 #endif /* COMPAT_43 */
1455 
1456 /*
1457  * Check access permissions using passed credentials.
1458  */
1459 static int
1460 vn_access(vp, user_flags, cred, td)
1461 	struct vnode	*vp;
1462 	int		user_flags;
1463 	struct ucred	*cred;
1464 	struct thread	*td;
1465 {
1466 	int error, flags;
1467 
1468 	/* Flags == 0 means only check for existence. */
1469 	error = 0;
1470 	if (user_flags) {
1471 		flags = 0;
1472 		if (user_flags & R_OK)
1473 			flags |= VREAD;
1474 		if (user_flags & W_OK)
1475 			flags |= VWRITE;
1476 		if (user_flags & X_OK)
1477 			flags |= VEXEC;
1478 #ifdef MAC
1479 		error = mac_check_vnode_access(cred, vp, flags);
1480 		if (error)
1481 			return (error);
1482 #endif
1483 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1484 			error = VOP_ACCESS(vp, flags, cred, td);
1485 	}
1486 	return (error);
1487 }
1488 
1489 /*
1490  * Check access permissions using "real" credentials.
1491  */
1492 #ifndef _SYS_SYSPROTO_H_
1493 struct access_args {
1494 	char	*path;
1495 	int	flags;
1496 };
1497 #endif
1498 int
1499 access(td, uap)
1500 	struct thread *td;
1501 	register struct access_args /* {
1502 		char *path;
1503 		int flags;
1504 	} */ *uap;
1505 {
1506 
1507 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1508 }
1509 
1510 int
1511 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1512 {
1513 	struct ucred *cred, *tmpcred;
1514 	register struct vnode *vp;
1515 	int error;
1516 	struct nameidata nd;
1517 
1518 	/*
1519 	 * Create and modify a temporary credential instead of one that
1520 	 * is potentially shared.  This could also mess up socket
1521 	 * buffer accounting which can run in an interrupt context.
1522 	 *
1523 	 * XXX - Depending on how "threads" are finally implemented, it
1524 	 * may be better to explicitly pass the credential to namei()
1525 	 * rather than to modify the potentially shared process structure.
1526 	 */
1527 	cred = td->td_ucred;
1528 	tmpcred = crdup(cred);
1529 	tmpcred->cr_uid = cred->cr_ruid;
1530 	tmpcred->cr_groups[0] = cred->cr_rgid;
1531 	td->td_ucred = tmpcred;
1532 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1533 	if ((error = namei(&nd)) != 0)
1534 		goto out1;
1535 	vp = nd.ni_vp;
1536 
1537 	error = vn_access(vp, flags, tmpcred, td);
1538 	NDFREE(&nd, NDF_ONLY_PNBUF);
1539 	vput(vp);
1540 out1:
1541 	td->td_ucred = cred;
1542 	crfree(tmpcred);
1543 	return (error);
1544 }
1545 
1546 /*
1547  * Check access permissions using "effective" credentials.
1548  */
1549 #ifndef _SYS_SYSPROTO_H_
1550 struct eaccess_args {
1551 	char	*path;
1552 	int	flags;
1553 };
1554 #endif
1555 int
1556 eaccess(td, uap)
1557 	struct thread *td;
1558 	register struct eaccess_args /* {
1559 		char *path;
1560 		int flags;
1561 	} */ *uap;
1562 {
1563 	struct nameidata nd;
1564 	struct vnode *vp;
1565 	int error;
1566 
1567 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1568 	    uap->path, td);
1569 	if ((error = namei(&nd)) != 0)
1570 		return (error);
1571 	vp = nd.ni_vp;
1572 
1573 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1574 	NDFREE(&nd, NDF_ONLY_PNBUF);
1575 	vput(vp);
1576 	return (error);
1577 }
1578 
1579 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1580 /*
1581  * Get file status; this version follows links.
1582  */
1583 #ifndef _SYS_SYSPROTO_H_
1584 struct ostat_args {
1585 	char	*path;
1586 	struct ostat *ub;
1587 };
1588 #endif
1589 /* ARGSUSED */
1590 int
1591 ostat(td, uap)
1592 	struct thread *td;
1593 	register struct ostat_args /* {
1594 		char *path;
1595 		struct ostat *ub;
1596 	} */ *uap;
1597 {
1598 	struct stat sb;
1599 	struct ostat osb;
1600 	int error;
1601 	struct nameidata nd;
1602 
1603 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1604 	    uap->path, td);
1605 	if ((error = namei(&nd)) != 0)
1606 		return (error);
1607 	NDFREE(&nd, NDF_ONLY_PNBUF);
1608 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1609 	vput(nd.ni_vp);
1610 	if (error)
1611 		return (error);
1612 	cvtstat(&sb, &osb);
1613 	error = copyout(&osb, uap->ub, sizeof (osb));
1614 	return (error);
1615 }
1616 
1617 /*
1618  * Get file status; this version does not follow links.
1619  */
1620 #ifndef _SYS_SYSPROTO_H_
1621 struct olstat_args {
1622 	char	*path;
1623 	struct ostat *ub;
1624 };
1625 #endif
1626 /* ARGSUSED */
1627 int
1628 olstat(td, uap)
1629 	struct thread *td;
1630 	register struct olstat_args /* {
1631 		char *path;
1632 		struct ostat *ub;
1633 	} */ *uap;
1634 {
1635 	struct vnode *vp;
1636 	struct stat sb;
1637 	struct ostat osb;
1638 	int error;
1639 	struct nameidata nd;
1640 
1641 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1642 	    uap->path, td);
1643 	if ((error = namei(&nd)) != 0)
1644 		return (error);
1645 	vp = nd.ni_vp;
1646 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1647 	NDFREE(&nd, NDF_ONLY_PNBUF);
1648 	vput(vp);
1649 	if (error)
1650 		return (error);
1651 	cvtstat(&sb, &osb);
1652 	error = copyout(&osb, uap->ub, sizeof (osb));
1653 	return (error);
1654 }
1655 
1656 /*
1657  * Convert from an old to a new stat structure.
1658  */
1659 void
1660 cvtstat(st, ost)
1661 	struct stat *st;
1662 	struct ostat *ost;
1663 {
1664 
1665 	ost->st_dev = st->st_dev;
1666 	ost->st_ino = st->st_ino;
1667 	ost->st_mode = st->st_mode;
1668 	ost->st_nlink = st->st_nlink;
1669 	ost->st_uid = st->st_uid;
1670 	ost->st_gid = st->st_gid;
1671 	ost->st_rdev = st->st_rdev;
1672 	if (st->st_size < (quad_t)1 << 32)
1673 		ost->st_size = st->st_size;
1674 	else
1675 		ost->st_size = -2;
1676 	ost->st_atime = st->st_atime;
1677 	ost->st_mtime = st->st_mtime;
1678 	ost->st_ctime = st->st_ctime;
1679 	ost->st_blksize = st->st_blksize;
1680 	ost->st_blocks = st->st_blocks;
1681 	ost->st_flags = st->st_flags;
1682 	ost->st_gen = st->st_gen;
1683 }
1684 #endif /* COMPAT_43 || COMPAT_SUNOS */
1685 
1686 /*
1687  * Get file status; this version follows links.
1688  */
1689 #ifndef _SYS_SYSPROTO_H_
1690 struct stat_args {
1691 	char	*path;
1692 	struct stat *ub;
1693 };
1694 #endif
1695 /* ARGSUSED */
1696 int
1697 stat(td, uap)
1698 	struct thread *td;
1699 	register struct stat_args /* {
1700 		char *path;
1701 		struct stat *ub;
1702 	} */ *uap;
1703 {
1704 	struct stat sb;
1705 	int error;
1706 	struct nameidata nd;
1707 
1708 #ifdef LOOKUP_SHARED
1709 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1710 	    UIO_USERSPACE, uap->path, td);
1711 #else
1712 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1713 	    uap->path, td);
1714 #endif
1715 	if ((error = namei(&nd)) != 0)
1716 		return (error);
1717 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1718 	NDFREE(&nd, NDF_ONLY_PNBUF);
1719 	vput(nd.ni_vp);
1720 	if (error)
1721 		return (error);
1722 	error = copyout(&sb, uap->ub, sizeof (sb));
1723 	return (error);
1724 }
1725 
1726 /*
1727  * Get file status; this version does not follow links.
1728  */
1729 #ifndef _SYS_SYSPROTO_H_
1730 struct lstat_args {
1731 	char	*path;
1732 	struct stat *ub;
1733 };
1734 #endif
1735 /* ARGSUSED */
1736 int
1737 lstat(td, uap)
1738 	struct thread *td;
1739 	register struct lstat_args /* {
1740 		char *path;
1741 		struct stat *ub;
1742 	} */ *uap;
1743 {
1744 	int error;
1745 	struct vnode *vp;
1746 	struct stat sb;
1747 	struct nameidata nd;
1748 
1749 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1750 	    uap->path, td);
1751 	if ((error = namei(&nd)) != 0)
1752 		return (error);
1753 	vp = nd.ni_vp;
1754 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1755 	NDFREE(&nd, NDF_ONLY_PNBUF);
1756 	vput(vp);
1757 	if (error)
1758 		return (error);
1759 	error = copyout(&sb, uap->ub, sizeof (sb));
1760 	return (error);
1761 }
1762 
1763 /*
1764  * Implementation of the NetBSD stat() function.
1765  * XXX This should probably be collapsed with the FreeBSD version,
1766  * as the differences are only due to vn_stat() clearing spares at
1767  * the end of the structures.  vn_stat could be split to avoid this,
1768  * and thus collapse the following to close to zero code.
1769  */
1770 void
1771 cvtnstat(sb, nsb)
1772 	struct stat *sb;
1773 	struct nstat *nsb;
1774 {
1775 	bzero(nsb, sizeof *nsb);
1776 	nsb->st_dev = sb->st_dev;
1777 	nsb->st_ino = sb->st_ino;
1778 	nsb->st_mode = sb->st_mode;
1779 	nsb->st_nlink = sb->st_nlink;
1780 	nsb->st_uid = sb->st_uid;
1781 	nsb->st_gid = sb->st_gid;
1782 	nsb->st_rdev = sb->st_rdev;
1783 	nsb->st_atimespec = sb->st_atimespec;
1784 	nsb->st_mtimespec = sb->st_mtimespec;
1785 	nsb->st_ctimespec = sb->st_ctimespec;
1786 	nsb->st_size = sb->st_size;
1787 	nsb->st_blocks = sb->st_blocks;
1788 	nsb->st_blksize = sb->st_blksize;
1789 	nsb->st_flags = sb->st_flags;
1790 	nsb->st_gen = sb->st_gen;
1791 	nsb->st_birthtimespec = sb->st_birthtimespec;
1792 }
1793 
1794 #ifndef _SYS_SYSPROTO_H_
1795 struct nstat_args {
1796 	char	*path;
1797 	struct nstat *ub;
1798 };
1799 #endif
1800 /* ARGSUSED */
1801 int
1802 nstat(td, uap)
1803 	struct thread *td;
1804 	register struct nstat_args /* {
1805 		char *path;
1806 		struct nstat *ub;
1807 	} */ *uap;
1808 {
1809 	struct stat sb;
1810 	struct nstat nsb;
1811 	int error;
1812 	struct nameidata nd;
1813 
1814 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1815 	    uap->path, td);
1816 	if ((error = namei(&nd)) != 0)
1817 		return (error);
1818 	NDFREE(&nd, NDF_ONLY_PNBUF);
1819 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1820 	vput(nd.ni_vp);
1821 	if (error)
1822 		return (error);
1823 	cvtnstat(&sb, &nsb);
1824 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1825 	return (error);
1826 }
1827 
1828 /*
1829  * NetBSD lstat.  Get file status; this version does not follow links.
1830  */
1831 #ifndef _SYS_SYSPROTO_H_
1832 struct lstat_args {
1833 	char	*path;
1834 	struct stat *ub;
1835 };
1836 #endif
1837 /* ARGSUSED */
1838 int
1839 nlstat(td, uap)
1840 	struct thread *td;
1841 	register struct nlstat_args /* {
1842 		char *path;
1843 		struct nstat *ub;
1844 	} */ *uap;
1845 {
1846 	int error;
1847 	struct vnode *vp;
1848 	struct stat sb;
1849 	struct nstat nsb;
1850 	struct nameidata nd;
1851 
1852 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1853 	    uap->path, td);
1854 	if ((error = namei(&nd)) != 0)
1855 		return (error);
1856 	vp = nd.ni_vp;
1857 	NDFREE(&nd, NDF_ONLY_PNBUF);
1858 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1859 	vput(vp);
1860 	if (error)
1861 		return (error);
1862 	cvtnstat(&sb, &nsb);
1863 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1864 	return (error);
1865 }
1866 
1867 /*
1868  * Get configurable pathname variables.
1869  */
1870 #ifndef _SYS_SYSPROTO_H_
1871 struct pathconf_args {
1872 	char	*path;
1873 	int	name;
1874 };
1875 #endif
1876 /* ARGSUSED */
1877 int
1878 pathconf(td, uap)
1879 	struct thread *td;
1880 	register struct pathconf_args /* {
1881 		char *path;
1882 		int name;
1883 	} */ *uap;
1884 {
1885 	int error;
1886 	struct nameidata nd;
1887 
1888 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1889 	    uap->path, td);
1890 	if ((error = namei(&nd)) != 0)
1891 		return (error);
1892 	NDFREE(&nd, NDF_ONLY_PNBUF);
1893 
1894 	/* If asynchronous I/O is available, it works for all files. */
1895 	if (uap->name == _PC_ASYNC_IO)
1896 		td->td_retval[0] = async_io_version;
1897 	else
1898 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1899 	vput(nd.ni_vp);
1900 	return (error);
1901 }
1902 
1903 /*
1904  * Return target name of a symbolic link.
1905  */
1906 #ifndef _SYS_SYSPROTO_H_
1907 struct readlink_args {
1908 	char	*path;
1909 	char	*buf;
1910 	int	count;
1911 };
1912 #endif
1913 /* ARGSUSED */
1914 int
1915 readlink(td, uap)
1916 	struct thread *td;
1917 	register struct readlink_args /* {
1918 		char *path;
1919 		char *buf;
1920 		int count;
1921 	} */ *uap;
1922 {
1923 
1924 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1925 	    UIO_USERSPACE, uap->count));
1926 }
1927 
1928 int
1929 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1930     enum uio_seg bufseg, int count)
1931 {
1932 	register struct vnode *vp;
1933 	struct iovec aiov;
1934 	struct uio auio;
1935 	int error;
1936 	struct nameidata nd;
1937 
1938 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1939 	if ((error = namei(&nd)) != 0)
1940 		return (error);
1941 	NDFREE(&nd, NDF_ONLY_PNBUF);
1942 	vp = nd.ni_vp;
1943 #ifdef MAC
1944 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1945 	if (error) {
1946 		vput(vp);
1947 		return (error);
1948 	}
1949 #endif
1950 	if (vp->v_type != VLNK)
1951 		error = EINVAL;
1952 	else {
1953 		aiov.iov_base = buf;
1954 		aiov.iov_len = count;
1955 		auio.uio_iov = &aiov;
1956 		auio.uio_iovcnt = 1;
1957 		auio.uio_offset = 0;
1958 		auio.uio_rw = UIO_READ;
1959 		auio.uio_segflg = bufseg;
1960 		auio.uio_td = td;
1961 		auio.uio_resid = count;
1962 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1963 	}
1964 	vput(vp);
1965 	td->td_retval[0] = count - auio.uio_resid;
1966 	return (error);
1967 }
1968 
1969 /*
1970  * Common implementation code for chflags() and fchflags().
1971  */
1972 static int
1973 setfflags(td, vp, flags)
1974 	struct thread *td;
1975 	struct vnode *vp;
1976 	int flags;
1977 {
1978 	int error;
1979 	struct mount *mp;
1980 	struct vattr vattr;
1981 
1982 	/*
1983 	 * Prevent non-root users from setting flags on devices.  When
1984 	 * a device is reused, users can retain ownership of the device
1985 	 * if they are allowed to set flags and programs assume that
1986 	 * chown can't fail when done as root.
1987 	 */
1988 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1989 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1990 		if (error)
1991 			return (error);
1992 	}
1993 
1994 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1995 		return (error);
1996 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1997 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1998 	VATTR_NULL(&vattr);
1999 	vattr.va_flags = flags;
2000 #ifdef MAC
2001 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2002 	if (error == 0)
2003 #endif
2004 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2005 	VOP_UNLOCK(vp, 0, td);
2006 	vn_finished_write(mp);
2007 	return (error);
2008 }
2009 
2010 /*
2011  * Change flags of a file given a path name.
2012  */
2013 #ifndef _SYS_SYSPROTO_H_
2014 struct chflags_args {
2015 	char	*path;
2016 	int	flags;
2017 };
2018 #endif
2019 /* ARGSUSED */
2020 int
2021 chflags(td, uap)
2022 	struct thread *td;
2023 	register struct chflags_args /* {
2024 		char *path;
2025 		int flags;
2026 	} */ *uap;
2027 {
2028 	int error;
2029 	struct nameidata nd;
2030 
2031 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2032 	if ((error = namei(&nd)) != 0)
2033 		return (error);
2034 	NDFREE(&nd, NDF_ONLY_PNBUF);
2035 	error = setfflags(td, nd.ni_vp, uap->flags);
2036 	vrele(nd.ni_vp);
2037 	return error;
2038 }
2039 
2040 /*
2041  * Same as chflags() but doesn't follow symlinks.
2042  */
2043 int
2044 lchflags(td, uap)
2045 	struct thread *td;
2046 	register struct lchflags_args /* {
2047 		char *path;
2048 		int flags;
2049 	} */ *uap;
2050 {
2051 	int error;
2052 	struct nameidata nd;
2053 
2054 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2055 	if ((error = namei(&nd)) != 0)
2056 		return (error);
2057 	NDFREE(&nd, NDF_ONLY_PNBUF);
2058 	error = setfflags(td, nd.ni_vp, uap->flags);
2059 	vrele(nd.ni_vp);
2060 	return error;
2061 }
2062 
2063 /*
2064  * Change flags of a file given a file descriptor.
2065  */
2066 #ifndef _SYS_SYSPROTO_H_
2067 struct fchflags_args {
2068 	int	fd;
2069 	int	flags;
2070 };
2071 #endif
2072 /* ARGSUSED */
2073 int
2074 fchflags(td, uap)
2075 	struct thread *td;
2076 	register struct fchflags_args /* {
2077 		int fd;
2078 		int flags;
2079 	} */ *uap;
2080 {
2081 	struct file *fp;
2082 	int error;
2083 
2084 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2085 		return (error);
2086 	error = setfflags(td, fp->f_vnode, uap->flags);
2087 	fdrop(fp, td);
2088 	return (error);
2089 }
2090 
2091 /*
2092  * Common implementation code for chmod(), lchmod() and fchmod().
2093  */
2094 static int
2095 setfmode(td, vp, mode)
2096 	struct thread *td;
2097 	struct vnode *vp;
2098 	int mode;
2099 {
2100 	int error;
2101 	struct mount *mp;
2102 	struct vattr vattr;
2103 
2104 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2105 		return (error);
2106 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2107 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2108 	VATTR_NULL(&vattr);
2109 	vattr.va_mode = mode & ALLPERMS;
2110 #ifdef MAC
2111 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2112 	if (error == 0)
2113 #endif
2114 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2115 	VOP_UNLOCK(vp, 0, td);
2116 	vn_finished_write(mp);
2117 	return error;
2118 }
2119 
2120 /*
2121  * Change mode of a file given path name.
2122  */
2123 #ifndef _SYS_SYSPROTO_H_
2124 struct chmod_args {
2125 	char	*path;
2126 	int	mode;
2127 };
2128 #endif
2129 /* ARGSUSED */
2130 int
2131 chmod(td, uap)
2132 	struct thread *td;
2133 	register struct chmod_args /* {
2134 		char *path;
2135 		int mode;
2136 	} */ *uap;
2137 {
2138 
2139 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2140 }
2141 
2142 int
2143 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2144 {
2145 	int error;
2146 	struct nameidata nd;
2147 
2148 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2149 	if ((error = namei(&nd)) != 0)
2150 		return (error);
2151 	NDFREE(&nd, NDF_ONLY_PNBUF);
2152 	error = setfmode(td, nd.ni_vp, mode);
2153 	vrele(nd.ni_vp);
2154 	return error;
2155 }
2156 
2157 /*
2158  * Change mode of a file given path name (don't follow links.)
2159  */
2160 #ifndef _SYS_SYSPROTO_H_
2161 struct lchmod_args {
2162 	char	*path;
2163 	int	mode;
2164 };
2165 #endif
2166 /* ARGSUSED */
2167 int
2168 lchmod(td, uap)
2169 	struct thread *td;
2170 	register struct lchmod_args /* {
2171 		char *path;
2172 		int mode;
2173 	} */ *uap;
2174 {
2175 	int error;
2176 	struct nameidata nd;
2177 
2178 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2179 	if ((error = namei(&nd)) != 0)
2180 		return (error);
2181 	NDFREE(&nd, NDF_ONLY_PNBUF);
2182 	error = setfmode(td, nd.ni_vp, uap->mode);
2183 	vrele(nd.ni_vp);
2184 	return error;
2185 }
2186 
2187 /*
2188  * Change mode of a file given a file descriptor.
2189  */
2190 #ifndef _SYS_SYSPROTO_H_
2191 struct fchmod_args {
2192 	int	fd;
2193 	int	mode;
2194 };
2195 #endif
2196 /* ARGSUSED */
2197 int
2198 fchmod(td, uap)
2199 	struct thread *td;
2200 	register struct fchmod_args /* {
2201 		int fd;
2202 		int mode;
2203 	} */ *uap;
2204 {
2205 	struct file *fp;
2206 	int error;
2207 
2208 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2209 		return (error);
2210 	error = setfmode(td, fp->f_vnode, uap->mode);
2211 	fdrop(fp, td);
2212 	return (error);
2213 }
2214 
2215 /*
2216  * Common implementation for chown(), lchown(), and fchown()
2217  */
2218 static int
2219 setfown(td, vp, uid, gid)
2220 	struct thread *td;
2221 	struct vnode *vp;
2222 	uid_t uid;
2223 	gid_t gid;
2224 {
2225 	int error;
2226 	struct mount *mp;
2227 	struct vattr vattr;
2228 
2229 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2230 		return (error);
2231 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2232 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2233 	VATTR_NULL(&vattr);
2234 	vattr.va_uid = uid;
2235 	vattr.va_gid = gid;
2236 #ifdef MAC
2237 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2238 	    vattr.va_gid);
2239 	if (error == 0)
2240 #endif
2241 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2242 	VOP_UNLOCK(vp, 0, td);
2243 	vn_finished_write(mp);
2244 	return error;
2245 }
2246 
2247 /*
2248  * Set ownership given a path name.
2249  */
2250 #ifndef _SYS_SYSPROTO_H_
2251 struct chown_args {
2252 	char	*path;
2253 	int	uid;
2254 	int	gid;
2255 };
2256 #endif
2257 /* ARGSUSED */
2258 int
2259 chown(td, uap)
2260 	struct thread *td;
2261 	register struct chown_args /* {
2262 		char *path;
2263 		int uid;
2264 		int gid;
2265 	} */ *uap;
2266 {
2267 
2268 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2269 }
2270 
2271 int
2272 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2273     int gid)
2274 {
2275 	int error;
2276 	struct nameidata nd;
2277 
2278 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2279 	if ((error = namei(&nd)) != 0)
2280 		return (error);
2281 	NDFREE(&nd, NDF_ONLY_PNBUF);
2282 	error = setfown(td, nd.ni_vp, uid, gid);
2283 	vrele(nd.ni_vp);
2284 	return (error);
2285 }
2286 
2287 /*
2288  * Set ownership given a path name, do not cross symlinks.
2289  */
2290 #ifndef _SYS_SYSPROTO_H_
2291 struct lchown_args {
2292 	char	*path;
2293 	int	uid;
2294 	int	gid;
2295 };
2296 #endif
2297 /* ARGSUSED */
2298 int
2299 lchown(td, uap)
2300 	struct thread *td;
2301 	register struct lchown_args /* {
2302 		char *path;
2303 		int uid;
2304 		int gid;
2305 	} */ *uap;
2306 {
2307 
2308 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2309 }
2310 
2311 int
2312 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2313     int gid)
2314 {
2315 	int error;
2316 	struct nameidata nd;
2317 
2318 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2319 	if ((error = namei(&nd)) != 0)
2320 		return (error);
2321 	NDFREE(&nd, NDF_ONLY_PNBUF);
2322 	error = setfown(td, nd.ni_vp, uid, gid);
2323 	vrele(nd.ni_vp);
2324 	return (error);
2325 }
2326 
2327 /*
2328  * Set ownership given a file descriptor.
2329  */
2330 #ifndef _SYS_SYSPROTO_H_
2331 struct fchown_args {
2332 	int	fd;
2333 	int	uid;
2334 	int	gid;
2335 };
2336 #endif
2337 /* ARGSUSED */
2338 int
2339 fchown(td, uap)
2340 	struct thread *td;
2341 	register struct fchown_args /* {
2342 		int fd;
2343 		int uid;
2344 		int gid;
2345 	} */ *uap;
2346 {
2347 	struct file *fp;
2348 	int error;
2349 
2350 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2351 		return (error);
2352 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2353 	fdrop(fp, td);
2354 	return (error);
2355 }
2356 
2357 /*
2358  * Common implementation code for utimes(), lutimes(), and futimes().
2359  */
2360 static int
2361 getutimes(usrtvp, tvpseg, tsp)
2362 	const struct timeval *usrtvp;
2363 	enum uio_seg tvpseg;
2364 	struct timespec *tsp;
2365 {
2366 	struct timeval tv[2];
2367 	const struct timeval *tvp;
2368 	int error;
2369 
2370 	if (usrtvp == NULL) {
2371 		microtime(&tv[0]);
2372 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2373 		tsp[1] = tsp[0];
2374 	} else {
2375 		if (tvpseg == UIO_SYSSPACE) {
2376 			tvp = usrtvp;
2377 		} else {
2378 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2379 				return (error);
2380 			tvp = tv;
2381 		}
2382 
2383 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2384 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2385 	}
2386 	return 0;
2387 }
2388 
2389 /*
2390  * Common implementation code for utimes(), lutimes(), and futimes().
2391  */
2392 static int
2393 setutimes(td, vp, ts, numtimes, nullflag)
2394 	struct thread *td;
2395 	struct vnode *vp;
2396 	const struct timespec *ts;
2397 	int numtimes;
2398 	int nullflag;
2399 {
2400 	int error, setbirthtime;
2401 	struct mount *mp;
2402 	struct vattr vattr;
2403 
2404 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2405 		return (error);
2406 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2407 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2408 	setbirthtime = 0;
2409 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2410 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2411 		setbirthtime = 1;
2412 	VATTR_NULL(&vattr);
2413 	vattr.va_atime = ts[0];
2414 	vattr.va_mtime = ts[1];
2415 	if (setbirthtime)
2416 		vattr.va_birthtime = ts[1];
2417 	if (numtimes > 2)
2418 		vattr.va_birthtime = ts[2];
2419 	if (nullflag)
2420 		vattr.va_vaflags |= VA_UTIMES_NULL;
2421 #ifdef MAC
2422 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2423 	    vattr.va_mtime);
2424 #endif
2425 	if (error == 0)
2426 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2427 	VOP_UNLOCK(vp, 0, td);
2428 	vn_finished_write(mp);
2429 	return error;
2430 }
2431 
2432 /*
2433  * Set the access and modification times of a file.
2434  */
2435 #ifndef _SYS_SYSPROTO_H_
2436 struct utimes_args {
2437 	char	*path;
2438 	struct	timeval *tptr;
2439 };
2440 #endif
2441 /* ARGSUSED */
2442 int
2443 utimes(td, uap)
2444 	struct thread *td;
2445 	register struct utimes_args /* {
2446 		char *path;
2447 		struct timeval *tptr;
2448 	} */ *uap;
2449 {
2450 
2451 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2452 	    UIO_USERSPACE));
2453 }
2454 
2455 int
2456 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2457     struct timeval *tptr, enum uio_seg tptrseg)
2458 {
2459 	struct timespec ts[2];
2460 	int error;
2461 	struct nameidata nd;
2462 
2463 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2464 		return (error);
2465 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2466 	if ((error = namei(&nd)) != 0)
2467 		return (error);
2468 	NDFREE(&nd, NDF_ONLY_PNBUF);
2469 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2470 	vrele(nd.ni_vp);
2471 	return (error);
2472 }
2473 
2474 /*
2475  * Set the access and modification times of a file.
2476  */
2477 #ifndef _SYS_SYSPROTO_H_
2478 struct lutimes_args {
2479 	char	*path;
2480 	struct	timeval *tptr;
2481 };
2482 #endif
2483 /* ARGSUSED */
2484 int
2485 lutimes(td, uap)
2486 	struct thread *td;
2487 	register struct lutimes_args /* {
2488 		char *path;
2489 		struct timeval *tptr;
2490 	} */ *uap;
2491 {
2492 
2493 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2494 	    UIO_USERSPACE));
2495 }
2496 
2497 int
2498 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2499     struct timeval *tptr, enum uio_seg tptrseg)
2500 {
2501 	struct timespec ts[2];
2502 	int error;
2503 	struct nameidata nd;
2504 
2505 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2506 		return (error);
2507 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2508 	if ((error = namei(&nd)) != 0)
2509 		return (error);
2510 	NDFREE(&nd, NDF_ONLY_PNBUF);
2511 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2512 	vrele(nd.ni_vp);
2513 	return (error);
2514 }
2515 
2516 /*
2517  * Set the access and modification times of a file.
2518  */
2519 #ifndef _SYS_SYSPROTO_H_
2520 struct futimes_args {
2521 	int	fd;
2522 	struct	timeval *tptr;
2523 };
2524 #endif
2525 /* ARGSUSED */
2526 int
2527 futimes(td, uap)
2528 	struct thread *td;
2529 	register struct futimes_args /* {
2530 		int  fd;
2531 		struct timeval *tptr;
2532 	} */ *uap;
2533 {
2534 
2535 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2536 }
2537 
2538 int
2539 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2540     enum uio_seg tptrseg)
2541 {
2542 	struct timespec ts[2];
2543 	struct file *fp;
2544 	int error;
2545 
2546 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2547 		return (error);
2548 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2549 		return (error);
2550 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2551 	fdrop(fp, td);
2552 	return (error);
2553 }
2554 
2555 /*
2556  * Truncate a file given its path name.
2557  */
2558 #ifndef _SYS_SYSPROTO_H_
2559 struct truncate_args {
2560 	char	*path;
2561 	int	pad;
2562 	off_t	length;
2563 };
2564 #endif
2565 /* ARGSUSED */
2566 int
2567 truncate(td, uap)
2568 	struct thread *td;
2569 	register struct truncate_args /* {
2570 		char *path;
2571 		int pad;
2572 		off_t length;
2573 	} */ *uap;
2574 {
2575 
2576 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2577 }
2578 
2579 int
2580 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2581 {
2582 	struct mount *mp;
2583 	struct vnode *vp;
2584 	struct vattr vattr;
2585 	int error;
2586 	struct nameidata nd;
2587 
2588 	if (length < 0)
2589 		return(EINVAL);
2590 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2591 	if ((error = namei(&nd)) != 0)
2592 		return (error);
2593 	vp = nd.ni_vp;
2594 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2595 		vrele(vp);
2596 		return (error);
2597 	}
2598 	NDFREE(&nd, NDF_ONLY_PNBUF);
2599 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2600 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2601 	if (vp->v_type == VDIR)
2602 		error = EISDIR;
2603 #ifdef MAC
2604 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2605 	}
2606 #endif
2607 	else if ((error = vn_writechk(vp)) == 0 &&
2608 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2609 		VATTR_NULL(&vattr);
2610 		vattr.va_size = length;
2611 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2612 	}
2613 	vput(vp);
2614 	vn_finished_write(mp);
2615 	return (error);
2616 }
2617 
2618 /*
2619  * Truncate a file given a file descriptor.
2620  */
2621 #ifndef _SYS_SYSPROTO_H_
2622 struct ftruncate_args {
2623 	int	fd;
2624 	int	pad;
2625 	off_t	length;
2626 };
2627 #endif
2628 /* ARGSUSED */
2629 int
2630 ftruncate(td, uap)
2631 	struct thread *td;
2632 	register struct ftruncate_args /* {
2633 		int fd;
2634 		int pad;
2635 		off_t length;
2636 	} */ *uap;
2637 {
2638 	struct mount *mp;
2639 	struct vattr vattr;
2640 	struct vnode *vp;
2641 	struct file *fp;
2642 	int error;
2643 
2644 	if (uap->length < 0)
2645 		return(EINVAL);
2646 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2647 		return (error);
2648 	if ((fp->f_flag & FWRITE) == 0) {
2649 		fdrop(fp, td);
2650 		return (EINVAL);
2651 	}
2652 	vp = fp->f_vnode;
2653 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2654 		fdrop(fp, td);
2655 		return (error);
2656 	}
2657 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2658 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2659 	if (vp->v_type == VDIR)
2660 		error = EISDIR;
2661 #ifdef MAC
2662 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2663 	    vp))) {
2664 	}
2665 #endif
2666 	else if ((error = vn_writechk(vp)) == 0) {
2667 		VATTR_NULL(&vattr);
2668 		vattr.va_size = uap->length;
2669 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2670 	}
2671 	VOP_UNLOCK(vp, 0, td);
2672 	vn_finished_write(mp);
2673 	fdrop(fp, td);
2674 	return (error);
2675 }
2676 
2677 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2678 /*
2679  * Truncate a file given its path name.
2680  */
2681 #ifndef _SYS_SYSPROTO_H_
2682 struct otruncate_args {
2683 	char	*path;
2684 	long	length;
2685 };
2686 #endif
2687 /* ARGSUSED */
2688 int
2689 otruncate(td, uap)
2690 	struct thread *td;
2691 	register struct otruncate_args /* {
2692 		char *path;
2693 		long length;
2694 	} */ *uap;
2695 {
2696 	struct truncate_args /* {
2697 		char *path;
2698 		int pad;
2699 		off_t length;
2700 	} */ nuap;
2701 
2702 	nuap.path = uap->path;
2703 	nuap.length = uap->length;
2704 	return (truncate(td, &nuap));
2705 }
2706 
2707 /*
2708  * Truncate a file given a file descriptor.
2709  */
2710 #ifndef _SYS_SYSPROTO_H_
2711 struct oftruncate_args {
2712 	int	fd;
2713 	long	length;
2714 };
2715 #endif
2716 /* ARGSUSED */
2717 int
2718 oftruncate(td, uap)
2719 	struct thread *td;
2720 	register struct oftruncate_args /* {
2721 		int fd;
2722 		long length;
2723 	} */ *uap;
2724 {
2725 	struct ftruncate_args /* {
2726 		int fd;
2727 		int pad;
2728 		off_t length;
2729 	} */ nuap;
2730 
2731 	nuap.fd = uap->fd;
2732 	nuap.length = uap->length;
2733 	return (ftruncate(td, &nuap));
2734 }
2735 #endif /* COMPAT_43 || COMPAT_SUNOS */
2736 
2737 /*
2738  * Sync an open file.
2739  */
2740 #ifndef _SYS_SYSPROTO_H_
2741 struct fsync_args {
2742 	int	fd;
2743 };
2744 #endif
2745 /* ARGSUSED */
2746 int
2747 fsync(td, uap)
2748 	struct thread *td;
2749 	struct fsync_args /* {
2750 		int fd;
2751 	} */ *uap;
2752 {
2753 	struct vnode *vp;
2754 	struct mount *mp;
2755 	struct file *fp;
2756 	vm_object_t obj;
2757 	int error;
2758 
2759 	GIANT_REQUIRED;
2760 
2761 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2762 		return (error);
2763 	vp = fp->f_vnode;
2764 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2765 		fdrop(fp, td);
2766 		return (error);
2767 	}
2768 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2769 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2770 		VM_OBJECT_LOCK(obj);
2771 		vm_object_page_clean(obj, 0, 0, 0);
2772 		VM_OBJECT_UNLOCK(obj);
2773 	}
2774 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2775 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2776 	    && softdep_fsync_hook != NULL)
2777 		error = (*softdep_fsync_hook)(vp);
2778 
2779 	VOP_UNLOCK(vp, 0, td);
2780 	vn_finished_write(mp);
2781 	fdrop(fp, td);
2782 	return (error);
2783 }
2784 
2785 /*
2786  * Rename files.  Source and destination must either both be directories,
2787  * or both not be directories.  If target is a directory, it must be empty.
2788  */
2789 #ifndef _SYS_SYSPROTO_H_
2790 struct rename_args {
2791 	char	*from;
2792 	char	*to;
2793 };
2794 #endif
2795 /* ARGSUSED */
2796 int
2797 rename(td, uap)
2798 	struct thread *td;
2799 	register struct rename_args /* {
2800 		char *from;
2801 		char *to;
2802 	} */ *uap;
2803 {
2804 
2805 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2806 }
2807 
2808 int
2809 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2810 {
2811 	struct mount *mp = NULL;
2812 	struct vnode *tvp, *fvp, *tdvp;
2813 	struct nameidata fromnd, tond;
2814 	int error;
2815 
2816 	bwillwrite();
2817 #ifdef MAC
2818 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2819 	    from, td);
2820 #else
2821 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2822 #endif
2823 	if ((error = namei(&fromnd)) != 0)
2824 		return (error);
2825 #ifdef MAC
2826 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2827 	    fromnd.ni_vp, &fromnd.ni_cnd);
2828 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2829 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2830 #endif
2831 	fvp = fromnd.ni_vp;
2832 	if (error == 0)
2833 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2834 	if (error != 0) {
2835 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2836 		vrele(fromnd.ni_dvp);
2837 		vrele(fvp);
2838 		goto out1;
2839 	}
2840 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2841 	    NOOBJ, pathseg, to, td);
2842 	if (fromnd.ni_vp->v_type == VDIR)
2843 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2844 	if ((error = namei(&tond)) != 0) {
2845 		/* Translate error code for rename("dir1", "dir2/."). */
2846 		if (error == EISDIR && fvp->v_type == VDIR)
2847 			error = EINVAL;
2848 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2849 		vrele(fromnd.ni_dvp);
2850 		vrele(fvp);
2851 		goto out1;
2852 	}
2853 	tdvp = tond.ni_dvp;
2854 	tvp = tond.ni_vp;
2855 	if (tvp != NULL) {
2856 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2857 			error = ENOTDIR;
2858 			goto out;
2859 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2860 			error = EISDIR;
2861 			goto out;
2862 		}
2863 	}
2864 	if (fvp == tdvp)
2865 		error = EINVAL;
2866 	/*
2867 	 * If the source is the same as the destination (that is, if they
2868 	 * are links to the same vnode), then there is nothing to do.
2869 	 */
2870 	if (fvp == tvp)
2871 		error = -1;
2872 #ifdef MAC
2873 	else
2874 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2875 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2876 #endif
2877 out:
2878 	if (!error) {
2879 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2880 		if (fromnd.ni_dvp != tdvp) {
2881 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2882 		}
2883 		if (tvp) {
2884 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2885 		}
2886 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2887 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2888 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2889 		NDFREE(&tond, NDF_ONLY_PNBUF);
2890 	} else {
2891 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2892 		NDFREE(&tond, NDF_ONLY_PNBUF);
2893 		if (tdvp == tvp)
2894 			vrele(tdvp);
2895 		else
2896 			vput(tdvp);
2897 		if (tvp)
2898 			vput(tvp);
2899 		vrele(fromnd.ni_dvp);
2900 		vrele(fvp);
2901 	}
2902 	vrele(tond.ni_startdir);
2903 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2904 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2905 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2906 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2907 out1:
2908 	vn_finished_write(mp);
2909 	if (fromnd.ni_startdir)
2910 		vrele(fromnd.ni_startdir);
2911 	if (error == -1)
2912 		return (0);
2913 	return (error);
2914 }
2915 
2916 /*
2917  * Make a directory file.
2918  */
2919 #ifndef _SYS_SYSPROTO_H_
2920 struct mkdir_args {
2921 	char	*path;
2922 	int	mode;
2923 };
2924 #endif
2925 /* ARGSUSED */
2926 int
2927 mkdir(td, uap)
2928 	struct thread *td;
2929 	register struct mkdir_args /* {
2930 		char *path;
2931 		int mode;
2932 	} */ *uap;
2933 {
2934 
2935 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2936 }
2937 
2938 int
2939 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2940 {
2941 	struct mount *mp;
2942 	struct vnode *vp;
2943 	struct vattr vattr;
2944 	int error;
2945 	struct nameidata nd;
2946 
2947 restart:
2948 	bwillwrite();
2949 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2950 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2951 	if ((error = namei(&nd)) != 0)
2952 		return (error);
2953 	vp = nd.ni_vp;
2954 	if (vp != NULL) {
2955 		NDFREE(&nd, NDF_ONLY_PNBUF);
2956 		vrele(vp);
2957 		/*
2958 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2959 		 * the strange behaviour of leaving the vnode unlocked
2960 		 * if the target is the same vnode as the parent.
2961 		 */
2962 		if (vp == nd.ni_dvp)
2963 			vrele(nd.ni_dvp);
2964 		else
2965 			vput(nd.ni_dvp);
2966 		return (EEXIST);
2967 	}
2968 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2969 		NDFREE(&nd, NDF_ONLY_PNBUF);
2970 		vput(nd.ni_dvp);
2971 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2972 			return (error);
2973 		goto restart;
2974 	}
2975 	VATTR_NULL(&vattr);
2976 	vattr.va_type = VDIR;
2977 	FILEDESC_LOCK(td->td_proc->p_fd);
2978 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2979 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2980 #ifdef MAC
2981 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2982 	    &vattr);
2983 	if (error)
2984 		goto out;
2985 #endif
2986 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2987 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2988 #ifdef MAC
2989 out:
2990 #endif
2991 	NDFREE(&nd, NDF_ONLY_PNBUF);
2992 	vput(nd.ni_dvp);
2993 	if (!error)
2994 		vput(nd.ni_vp);
2995 	vn_finished_write(mp);
2996 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2997 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2998 	return (error);
2999 }
3000 
3001 /*
3002  * Remove a directory file.
3003  */
3004 #ifndef _SYS_SYSPROTO_H_
3005 struct rmdir_args {
3006 	char	*path;
3007 };
3008 #endif
3009 /* ARGSUSED */
3010 int
3011 rmdir(td, uap)
3012 	struct thread *td;
3013 	struct rmdir_args /* {
3014 		char *path;
3015 	} */ *uap;
3016 {
3017 
3018 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3019 }
3020 
3021 int
3022 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3023 {
3024 	struct mount *mp;
3025 	struct vnode *vp;
3026 	int error;
3027 	struct nameidata nd;
3028 
3029 restart:
3030 	bwillwrite();
3031 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3032 	if ((error = namei(&nd)) != 0)
3033 		return (error);
3034 	vp = nd.ni_vp;
3035 	if (vp->v_type != VDIR) {
3036 		error = ENOTDIR;
3037 		goto out;
3038 	}
3039 	/*
3040 	 * No rmdir "." please.
3041 	 */
3042 	if (nd.ni_dvp == vp) {
3043 		error = EINVAL;
3044 		goto out;
3045 	}
3046 	/*
3047 	 * The root of a mounted filesystem cannot be deleted.
3048 	 */
3049 	if (vp->v_vflag & VV_ROOT) {
3050 		error = EBUSY;
3051 		goto out;
3052 	}
3053 #ifdef MAC
3054 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3055 	    &nd.ni_cnd);
3056 	if (error)
3057 		goto out;
3058 #endif
3059 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3060 		NDFREE(&nd, NDF_ONLY_PNBUF);
3061 		if (nd.ni_dvp == vp)
3062 			vrele(nd.ni_dvp);
3063 		else
3064 			vput(nd.ni_dvp);
3065 		vput(vp);
3066 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3067 			return (error);
3068 		goto restart;
3069 	}
3070 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3071 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3072 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3073 	vn_finished_write(mp);
3074 out:
3075 	NDFREE(&nd, NDF_ONLY_PNBUF);
3076 	if (nd.ni_dvp == vp)
3077 		vrele(nd.ni_dvp);
3078 	else
3079 		vput(nd.ni_dvp);
3080 	vput(vp);
3081 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3082 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3083 	return (error);
3084 }
3085 
3086 #ifdef COMPAT_43
3087 /*
3088  * Read a block of directory entries in a filesystem independent format.
3089  */
3090 #ifndef _SYS_SYSPROTO_H_
3091 struct ogetdirentries_args {
3092 	int	fd;
3093 	char	*buf;
3094 	u_int	count;
3095 	long	*basep;
3096 };
3097 #endif
3098 int
3099 ogetdirentries(td, uap)
3100 	struct thread *td;
3101 	register struct ogetdirentries_args /* {
3102 		int fd;
3103 		char *buf;
3104 		u_int count;
3105 		long *basep;
3106 	} */ *uap;
3107 {
3108 	struct vnode *vp;
3109 	struct file *fp;
3110 	struct uio auio, kuio;
3111 	struct iovec aiov, kiov;
3112 	struct dirent *dp, *edp;
3113 	caddr_t dirbuf;
3114 	int error, eofflag, readcnt;
3115 	long loff;
3116 
3117 	/* XXX arbitrary sanity limit on `count'. */
3118 	if (uap->count > 64 * 1024)
3119 		return (EINVAL);
3120 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3121 		return (error);
3122 	if ((fp->f_flag & FREAD) == 0) {
3123 		fdrop(fp, td);
3124 		return (EBADF);
3125 	}
3126 	vp = fp->f_vnode;
3127 unionread:
3128 	if (vp->v_type != VDIR) {
3129 		fdrop(fp, td);
3130 		return (EINVAL);
3131 	}
3132 	aiov.iov_base = uap->buf;
3133 	aiov.iov_len = uap->count;
3134 	auio.uio_iov = &aiov;
3135 	auio.uio_iovcnt = 1;
3136 	auio.uio_rw = UIO_READ;
3137 	auio.uio_segflg = UIO_USERSPACE;
3138 	auio.uio_td = td;
3139 	auio.uio_resid = uap->count;
3140 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3141 	loff = auio.uio_offset = fp->f_offset;
3142 #ifdef MAC
3143 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3144 	if (error) {
3145 		VOP_UNLOCK(vp, 0, td);
3146 		fdrop(fp, td);
3147 		return (error);
3148 	}
3149 #endif
3150 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3151 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3152 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3153 			    NULL, NULL);
3154 			fp->f_offset = auio.uio_offset;
3155 		} else
3156 #	endif
3157 	{
3158 		kuio = auio;
3159 		kuio.uio_iov = &kiov;
3160 		kuio.uio_segflg = UIO_SYSSPACE;
3161 		kiov.iov_len = uap->count;
3162 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3163 		kiov.iov_base = dirbuf;
3164 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3165 			    NULL, NULL);
3166 		fp->f_offset = kuio.uio_offset;
3167 		if (error == 0) {
3168 			readcnt = uap->count - kuio.uio_resid;
3169 			edp = (struct dirent *)&dirbuf[readcnt];
3170 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3171 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3172 					/*
3173 					 * The expected low byte of
3174 					 * dp->d_namlen is our dp->d_type.
3175 					 * The high MBZ byte of dp->d_namlen
3176 					 * is our dp->d_namlen.
3177 					 */
3178 					dp->d_type = dp->d_namlen;
3179 					dp->d_namlen = 0;
3180 #				else
3181 					/*
3182 					 * The dp->d_type is the high byte
3183 					 * of the expected dp->d_namlen,
3184 					 * so must be zero'ed.
3185 					 */
3186 					dp->d_type = 0;
3187 #				endif
3188 				if (dp->d_reclen > 0) {
3189 					dp = (struct dirent *)
3190 					    ((char *)dp + dp->d_reclen);
3191 				} else {
3192 					error = EIO;
3193 					break;
3194 				}
3195 			}
3196 			if (dp >= edp)
3197 				error = uiomove(dirbuf, readcnt, &auio);
3198 		}
3199 		FREE(dirbuf, M_TEMP);
3200 	}
3201 	VOP_UNLOCK(vp, 0, td);
3202 	if (error) {
3203 		fdrop(fp, td);
3204 		return (error);
3205 	}
3206 	if (uap->count == auio.uio_resid) {
3207 		if (union_dircheckp) {
3208 			error = union_dircheckp(td, &vp, fp);
3209 			if (error == -1)
3210 				goto unionread;
3211 			if (error) {
3212 				fdrop(fp, td);
3213 				return (error);
3214 			}
3215 		}
3216 		/*
3217 		 * XXX We could delay dropping the lock above but
3218 		 * union_dircheckp complicates things.
3219 		 */
3220 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3221 		if ((vp->v_vflag & VV_ROOT) &&
3222 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3223 			struct vnode *tvp = vp;
3224 			vp = vp->v_mount->mnt_vnodecovered;
3225 			VREF(vp);
3226 			fp->f_vnode = vp;
3227 			fp->f_data = vp;
3228 			fp->f_offset = 0;
3229 			vput(tvp);
3230 			goto unionread;
3231 		}
3232 		VOP_UNLOCK(vp, 0, td);
3233 	}
3234 	error = copyout(&loff, uap->basep, sizeof(long));
3235 	fdrop(fp, td);
3236 	td->td_retval[0] = uap->count - auio.uio_resid;
3237 	return (error);
3238 }
3239 #endif /* COMPAT_43 */
3240 
3241 /*
3242  * Read a block of directory entries in a filesystem independent format.
3243  */
3244 #ifndef _SYS_SYSPROTO_H_
3245 struct getdirentries_args {
3246 	int	fd;
3247 	char	*buf;
3248 	u_int	count;
3249 	long	*basep;
3250 };
3251 #endif
3252 int
3253 getdirentries(td, uap)
3254 	struct thread *td;
3255 	register struct getdirentries_args /* {
3256 		int fd;
3257 		char *buf;
3258 		u_int count;
3259 		long *basep;
3260 	} */ *uap;
3261 {
3262 	struct vnode *vp;
3263 	struct file *fp;
3264 	struct uio auio;
3265 	struct iovec aiov;
3266 	long loff;
3267 	int error, eofflag;
3268 
3269 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3270 		return (error);
3271 	if ((fp->f_flag & FREAD) == 0) {
3272 		fdrop(fp, td);
3273 		return (EBADF);
3274 	}
3275 	vp = fp->f_vnode;
3276 unionread:
3277 	if (vp->v_type != VDIR) {
3278 		fdrop(fp, td);
3279 		return (EINVAL);
3280 	}
3281 	aiov.iov_base = uap->buf;
3282 	aiov.iov_len = uap->count;
3283 	auio.uio_iov = &aiov;
3284 	auio.uio_iovcnt = 1;
3285 	auio.uio_rw = UIO_READ;
3286 	auio.uio_segflg = UIO_USERSPACE;
3287 	auio.uio_td = td;
3288 	auio.uio_resid = uap->count;
3289 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3290 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3291 	loff = auio.uio_offset = fp->f_offset;
3292 #ifdef MAC
3293 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3294 	if (error == 0)
3295 #endif
3296 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3297 		    NULL);
3298 	fp->f_offset = auio.uio_offset;
3299 	VOP_UNLOCK(vp, 0, td);
3300 	if (error) {
3301 		fdrop(fp, td);
3302 		return (error);
3303 	}
3304 	if (uap->count == auio.uio_resid) {
3305 		if (union_dircheckp) {
3306 			error = union_dircheckp(td, &vp, fp);
3307 			if (error == -1)
3308 				goto unionread;
3309 			if (error) {
3310 				fdrop(fp, td);
3311 				return (error);
3312 			}
3313 		}
3314 		/*
3315 		 * XXX We could delay dropping the lock above but
3316 		 * union_dircheckp complicates things.
3317 		 */
3318 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3319 		if ((vp->v_vflag & VV_ROOT) &&
3320 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3321 			struct vnode *tvp = vp;
3322 			vp = vp->v_mount->mnt_vnodecovered;
3323 			VREF(vp);
3324 			fp->f_vnode = vp;
3325 			fp->f_data = vp;
3326 			fp->f_offset = 0;
3327 			vput(tvp);
3328 			goto unionread;
3329 		}
3330 		VOP_UNLOCK(vp, 0, td);
3331 	}
3332 	if (uap->basep != NULL) {
3333 		error = copyout(&loff, uap->basep, sizeof(long));
3334 	}
3335 	td->td_retval[0] = uap->count - auio.uio_resid;
3336 	fdrop(fp, td);
3337 	return (error);
3338 }
3339 #ifndef _SYS_SYSPROTO_H_
3340 struct getdents_args {
3341 	int fd;
3342 	char *buf;
3343 	size_t count;
3344 };
3345 #endif
3346 int
3347 getdents(td, uap)
3348 	struct thread *td;
3349 	register struct getdents_args /* {
3350 		int fd;
3351 		char *buf;
3352 		u_int count;
3353 	} */ *uap;
3354 {
3355 	struct getdirentries_args ap;
3356 	ap.fd = uap->fd;
3357 	ap.buf = uap->buf;
3358 	ap.count = uap->count;
3359 	ap.basep = NULL;
3360 	return getdirentries(td, &ap);
3361 }
3362 
3363 /*
3364  * Set the mode mask for creation of filesystem nodes.
3365  *
3366  * MP SAFE
3367  */
3368 #ifndef _SYS_SYSPROTO_H_
3369 struct umask_args {
3370 	int	newmask;
3371 };
3372 #endif
3373 int
3374 umask(td, uap)
3375 	struct thread *td;
3376 	struct umask_args /* {
3377 		int newmask;
3378 	} */ *uap;
3379 {
3380 	register struct filedesc *fdp;
3381 
3382 	FILEDESC_LOCK(td->td_proc->p_fd);
3383 	fdp = td->td_proc->p_fd;
3384 	td->td_retval[0] = fdp->fd_cmask;
3385 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3386 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3387 	return (0);
3388 }
3389 
3390 /*
3391  * Void all references to file by ripping underlying filesystem
3392  * away from vnode.
3393  */
3394 #ifndef _SYS_SYSPROTO_H_
3395 struct revoke_args {
3396 	char	*path;
3397 };
3398 #endif
3399 /* ARGSUSED */
3400 int
3401 revoke(td, uap)
3402 	struct thread *td;
3403 	register struct revoke_args /* {
3404 		char *path;
3405 	} */ *uap;
3406 {
3407 	struct mount *mp;
3408 	struct vnode *vp;
3409 	struct vattr vattr;
3410 	int error;
3411 	struct nameidata nd;
3412 
3413 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3414 	if ((error = namei(&nd)) != 0)
3415 		return (error);
3416 	vp = nd.ni_vp;
3417 	NDFREE(&nd, NDF_ONLY_PNBUF);
3418 	if (vp->v_type != VCHR) {
3419 		vput(vp);
3420 		return (EINVAL);
3421 	}
3422 #ifdef MAC
3423 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3424 	if (error) {
3425 		vput(vp);
3426 		return (error);
3427 	}
3428 #endif
3429 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3430 	if (error) {
3431 		vput(vp);
3432 		return (error);
3433 	}
3434 	VOP_UNLOCK(vp, 0, td);
3435 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3436 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3437 		if (error)
3438 			goto out;
3439 	}
3440 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3441 		goto out;
3442 	if (vcount(vp) > 1)
3443 		VOP_REVOKE(vp, REVOKEALL);
3444 	vn_finished_write(mp);
3445 out:
3446 	vrele(vp);
3447 	return (error);
3448 }
3449 
3450 /*
3451  * Convert a user file descriptor to a kernel file entry.
3452  * The file entry is locked upon returning.
3453  */
3454 int
3455 getvnode(fdp, fd, fpp)
3456 	struct filedesc *fdp;
3457 	int fd;
3458 	struct file **fpp;
3459 {
3460 	int error;
3461 	struct file *fp;
3462 
3463 	fp = NULL;
3464 	if (fdp == NULL)
3465 		error = EBADF;
3466 	else {
3467 		FILEDESC_LOCK(fdp);
3468 		if ((u_int)fd >= fdp->fd_nfiles ||
3469 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3470 			error = EBADF;
3471 		else if (fp->f_vnode == NULL) {
3472 			fp = NULL;
3473 			error = EINVAL;
3474 		} else {
3475 			fhold(fp);
3476 			error = 0;
3477 		}
3478 		FILEDESC_UNLOCK(fdp);
3479 	}
3480 	*fpp = fp;
3481 	return (error);
3482 }
3483 
3484 /*
3485  * Get (NFS) file handle
3486  */
3487 #ifndef _SYS_SYSPROTO_H_
3488 struct getfh_args {
3489 	char	*fname;
3490 	fhandle_t *fhp;
3491 };
3492 #endif
3493 int
3494 getfh(td, uap)
3495 	struct thread *td;
3496 	register struct getfh_args *uap;
3497 {
3498 	struct nameidata nd;
3499 	fhandle_t fh;
3500 	register struct vnode *vp;
3501 	int error;
3502 
3503 	/*
3504 	 * Must be super user
3505 	 */
3506 	error = suser(td);
3507 	if (error)
3508 		return (error);
3509 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3510 	error = namei(&nd);
3511 	if (error)
3512 		return (error);
3513 	NDFREE(&nd, NDF_ONLY_PNBUF);
3514 	vp = nd.ni_vp;
3515 	bzero(&fh, sizeof(fh));
3516 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3517 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3518 	vput(vp);
3519 	if (error)
3520 		return (error);
3521 	error = copyout(&fh, uap->fhp, sizeof (fh));
3522 	return (error);
3523 }
3524 
3525 /*
3526  * syscall for the rpc.lockd to use to translate a NFS file handle into
3527  * an open descriptor.
3528  *
3529  * warning: do not remove the suser() call or this becomes one giant
3530  * security hole.
3531  */
3532 #ifndef _SYS_SYSPROTO_H_
3533 struct fhopen_args {
3534 	const struct fhandle *u_fhp;
3535 	int flags;
3536 };
3537 #endif
3538 int
3539 fhopen(td, uap)
3540 	struct thread *td;
3541 	struct fhopen_args /* {
3542 		const struct fhandle *u_fhp;
3543 		int flags;
3544 	} */ *uap;
3545 {
3546 	struct proc *p = td->td_proc;
3547 	struct mount *mp;
3548 	struct vnode *vp;
3549 	struct fhandle fhp;
3550 	struct vattr vat;
3551 	struct vattr *vap = &vat;
3552 	struct flock lf;
3553 	struct file *fp;
3554 	register struct filedesc *fdp = p->p_fd;
3555 	int fmode, mode, error, type;
3556 	struct file *nfp;
3557 	int indx;
3558 
3559 	/*
3560 	 * Must be super user
3561 	 */
3562 	error = suser(td);
3563 	if (error)
3564 		return (error);
3565 
3566 	fmode = FFLAGS(uap->flags);
3567 	/* why not allow a non-read/write open for our lockd? */
3568 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3569 		return (EINVAL);
3570 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3571 	if (error)
3572 		return(error);
3573 	/* find the mount point */
3574 	mp = vfs_getvfs(&fhp.fh_fsid);
3575 	if (mp == NULL)
3576 		return (ESTALE);
3577 	/* now give me my vnode, it gets returned to me locked */
3578 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3579 	if (error)
3580 		return (error);
3581  	/*
3582 	 * from now on we have to make sure not
3583 	 * to forget about the vnode
3584 	 * any error that causes an abort must vput(vp)
3585 	 * just set error = err and 'goto bad;'.
3586 	 */
3587 
3588 	/*
3589 	 * from vn_open
3590 	 */
3591 	if (vp->v_type == VLNK) {
3592 		error = EMLINK;
3593 		goto bad;
3594 	}
3595 	if (vp->v_type == VSOCK) {
3596 		error = EOPNOTSUPP;
3597 		goto bad;
3598 	}
3599 	mode = 0;
3600 	if (fmode & (FWRITE | O_TRUNC)) {
3601 		if (vp->v_type == VDIR) {
3602 			error = EISDIR;
3603 			goto bad;
3604 		}
3605 		error = vn_writechk(vp);
3606 		if (error)
3607 			goto bad;
3608 		mode |= VWRITE;
3609 	}
3610 	if (fmode & FREAD)
3611 		mode |= VREAD;
3612 	if (fmode & O_APPEND)
3613 		mode |= VAPPEND;
3614 #ifdef MAC
3615 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3616 	if (error)
3617 		goto bad;
3618 #endif
3619 	if (mode) {
3620 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3621 		if (error)
3622 			goto bad;
3623 	}
3624 	if (fmode & O_TRUNC) {
3625 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3626 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3627 			vrele(vp);
3628 			return (error);
3629 		}
3630 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3631 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3632 #ifdef MAC
3633 		/*
3634 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3635 		 * should be right.
3636 		 */
3637 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3638 		if (error == 0) {
3639 #endif
3640 			VATTR_NULL(vap);
3641 			vap->va_size = 0;
3642 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3643 #ifdef MAC
3644 		}
3645 #endif
3646 		vn_finished_write(mp);
3647 		if (error)
3648 			goto bad;
3649 	}
3650 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3651 	if (error)
3652 		goto bad;
3653 	/*
3654 	 * Make sure that a VM object is created for VMIO support.
3655 	 */
3656 	if (vn_canvmio(vp) == TRUE) {
3657 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3658 			goto bad;
3659 	}
3660 	if (fmode & FWRITE)
3661 		vp->v_writecount++;
3662 
3663 	/*
3664 	 * end of vn_open code
3665 	 */
3666 
3667 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3668 		if (fmode & FWRITE)
3669 			vp->v_writecount--;
3670 		goto bad;
3671 	}
3672 	/* An extra reference on `nfp' has been held for us by falloc(). */
3673 	fp = nfp;
3674 
3675 	nfp->f_vnode = vp;
3676 	nfp->f_data = vp;
3677 	nfp->f_flag = fmode & FMASK;
3678 	nfp->f_ops = &vnops;
3679 	nfp->f_type = DTYPE_VNODE;
3680 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3681 		lf.l_whence = SEEK_SET;
3682 		lf.l_start = 0;
3683 		lf.l_len = 0;
3684 		if (fmode & O_EXLOCK)
3685 			lf.l_type = F_WRLCK;
3686 		else
3687 			lf.l_type = F_RDLCK;
3688 		type = F_FLOCK;
3689 		if ((fmode & FNONBLOCK) == 0)
3690 			type |= F_WAIT;
3691 		VOP_UNLOCK(vp, 0, td);
3692 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3693 			    type)) != 0) {
3694 			/*
3695 			 * The lock request failed.  Normally close the
3696 			 * descriptor but handle the case where someone might
3697 			 * have dup()d or close()d it when we weren't looking.
3698 			 */
3699 			FILEDESC_LOCK(fdp);
3700 			if (fdp->fd_ofiles[indx] == fp) {
3701 				fdp->fd_ofiles[indx] = NULL;
3702 				FILEDESC_UNLOCK(fdp);
3703 				fdrop(fp, td);
3704 			} else
3705 				FILEDESC_UNLOCK(fdp);
3706 			/*
3707 			 * release our private reference
3708 			 */
3709 			fdrop(fp, td);
3710 			return(error);
3711 		}
3712 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3713 		fp->f_flag |= FHASLOCK;
3714 	}
3715 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3716 		vfs_object_create(vp, td, td->td_ucred);
3717 
3718 	VOP_UNLOCK(vp, 0, td);
3719 	fdrop(fp, td);
3720 	td->td_retval[0] = indx;
3721 	return (0);
3722 
3723 bad:
3724 	vput(vp);
3725 	return (error);
3726 }
3727 
3728 /*
3729  * Stat an (NFS) file handle.
3730  */
3731 #ifndef _SYS_SYSPROTO_H_
3732 struct fhstat_args {
3733 	struct fhandle *u_fhp;
3734 	struct stat *sb;
3735 };
3736 #endif
3737 int
3738 fhstat(td, uap)
3739 	struct thread *td;
3740 	register struct fhstat_args /* {
3741 		struct fhandle *u_fhp;
3742 		struct stat *sb;
3743 	} */ *uap;
3744 {
3745 	struct stat sb;
3746 	fhandle_t fh;
3747 	struct mount *mp;
3748 	struct vnode *vp;
3749 	int error;
3750 
3751 	/*
3752 	 * Must be super user
3753 	 */
3754 	error = suser(td);
3755 	if (error)
3756 		return (error);
3757 
3758 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3759 	if (error)
3760 		return (error);
3761 
3762 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3763 		return (ESTALE);
3764 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3765 		return (error);
3766 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3767 	vput(vp);
3768 	if (error)
3769 		return (error);
3770 	error = copyout(&sb, uap->sb, sizeof(sb));
3771 	return (error);
3772 }
3773 
3774 /*
3775  * Implement fstatfs() for (NFS) file handles.
3776  */
3777 #ifndef _SYS_SYSPROTO_H_
3778 struct fhstatfs_args {
3779 	struct fhandle *u_fhp;
3780 	struct statfs *buf;
3781 };
3782 #endif
3783 int
3784 fhstatfs(td, uap)
3785 	struct thread *td;
3786 	struct fhstatfs_args /* {
3787 		struct fhandle *u_fhp;
3788 		struct statfs *buf;
3789 	} */ *uap;
3790 {
3791 	struct statfs *sp;
3792 	struct mount *mp;
3793 	struct vnode *vp;
3794 	struct statfs sb;
3795 	fhandle_t fh;
3796 	int error;
3797 
3798 	/*
3799 	 * Must be super user
3800 	 */
3801 	error = suser(td);
3802 	if (error)
3803 		return (error);
3804 
3805 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3806 		return (error);
3807 
3808 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3809 		return (ESTALE);
3810 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3811 		return (error);
3812 	mp = vp->v_mount;
3813 	sp = &mp->mnt_stat;
3814 	vput(vp);
3815 #ifdef MAC
3816 	error = mac_check_mount_stat(td->td_ucred, mp);
3817 	if (error)
3818 		return (error);
3819 #endif
3820 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3821 		return (error);
3822 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3823 	if (suser(td)) {
3824 		bcopy(sp, &sb, sizeof(sb));
3825 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3826 		sp = &sb;
3827 	}
3828 	return (copyout(sp, uap->buf, sizeof(*sp)));
3829 }
3830 
3831 /*
3832  * Syscall to push extended attribute configuration information into the
3833  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3834  * a command (int cmd), and attribute name and misc data.  For now, the
3835  * attribute name is left in userspace for consumption by the VFS_op.
3836  * It will probably be changed to be copied into sysspace by the
3837  * syscall in the future, once issues with various consumers of the
3838  * attribute code have raised their hands.
3839  *
3840  * Currently this is used only by UFS Extended Attributes.
3841  */
3842 int
3843 extattrctl(td, uap)
3844 	struct thread *td;
3845 	struct extattrctl_args /* {
3846 		const char *path;
3847 		int cmd;
3848 		const char *filename;
3849 		int attrnamespace;
3850 		const char *attrname;
3851 	} */ *uap;
3852 {
3853 	struct vnode *filename_vp;
3854 	struct nameidata nd;
3855 	struct mount *mp, *mp_writable;
3856 	char attrname[EXTATTR_MAXNAMELEN];
3857 	int error;
3858 
3859 	/*
3860 	 * uap->attrname is not always defined.  We check again later when we
3861 	 * invoke the VFS call so as to pass in NULL there if needed.
3862 	 */
3863 	if (uap->attrname != NULL) {
3864 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3865 		    NULL);
3866 		if (error)
3867 			return (error);
3868 	}
3869 
3870 	/*
3871 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3872 	 * which VFS_EXTATTRCTL() will later release.
3873 	 */
3874 	filename_vp = NULL;
3875 	if (uap->filename != NULL) {
3876 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3877 		    uap->filename, td);
3878 		error = namei(&nd);
3879 		if (error)
3880 			return (error);
3881 		filename_vp = nd.ni_vp;
3882 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3883 	}
3884 
3885 	/* uap->path is always defined. */
3886 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3887 	error = namei(&nd);
3888 	if (error) {
3889 		if (filename_vp != NULL)
3890 			vput(filename_vp);
3891 		return (error);
3892 	}
3893 	mp = nd.ni_vp->v_mount;
3894 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3895 	NDFREE(&nd, 0);
3896 	if (error) {
3897 		if (filename_vp != NULL)
3898 			vput(filename_vp);
3899 		return (error);
3900 	}
3901 
3902 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3903 	    uap->attrname != NULL ? attrname : NULL, td);
3904 
3905 	vn_finished_write(mp_writable);
3906 	/*
3907 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3908 	 * filename_vp, so vrele it if it is defined.
3909 	 */
3910 	if (filename_vp != NULL)
3911 		vrele(filename_vp);
3912 	return (error);
3913 }
3914 
3915 /*-
3916  * Set a named extended attribute on a file or directory
3917  *
3918  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3919  *            kernelspace string pointer "attrname", userspace buffer
3920  *            pointer "data", buffer length "nbytes", thread "td".
3921  * Returns: 0 on success, an error number otherwise
3922  * Locks: none
3923  * References: vp must be a valid reference for the duration of the call
3924  */
3925 static int
3926 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3927     void *data, size_t nbytes, struct thread *td)
3928 {
3929 	struct mount *mp;
3930 	struct uio auio;
3931 	struct iovec aiov;
3932 	ssize_t cnt;
3933 	int error;
3934 
3935 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3936 	if (error)
3937 		return (error);
3938 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3939 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3940 
3941 	aiov.iov_base = data;
3942 	aiov.iov_len = nbytes;
3943 	auio.uio_iov = &aiov;
3944 	auio.uio_iovcnt = 1;
3945 	auio.uio_offset = 0;
3946 	if (nbytes > INT_MAX) {
3947 		error = EINVAL;
3948 		goto done;
3949 	}
3950 	auio.uio_resid = nbytes;
3951 	auio.uio_rw = UIO_WRITE;
3952 	auio.uio_segflg = UIO_USERSPACE;
3953 	auio.uio_td = td;
3954 	cnt = nbytes;
3955 
3956 #ifdef MAC
3957 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3958 	    attrname, &auio);
3959 	if (error)
3960 		goto done;
3961 #endif
3962 
3963 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3964 	    td->td_ucred, td);
3965 	cnt -= auio.uio_resid;
3966 	td->td_retval[0] = cnt;
3967 
3968 done:
3969 	VOP_UNLOCK(vp, 0, td);
3970 	vn_finished_write(mp);
3971 	return (error);
3972 }
3973 
3974 int
3975 extattr_set_fd(td, uap)
3976 	struct thread *td;
3977 	struct extattr_set_fd_args /* {
3978 		int fd;
3979 		int attrnamespace;
3980 		const char *attrname;
3981 		void *data;
3982 		size_t nbytes;
3983 	} */ *uap;
3984 {
3985 	struct file *fp;
3986 	char attrname[EXTATTR_MAXNAMELEN];
3987 	int error;
3988 
3989 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3990 	if (error)
3991 		return (error);
3992 
3993 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3994 	if (error)
3995 		return (error);
3996 
3997 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
3998 	    attrname, uap->data, uap->nbytes, td);
3999 	fdrop(fp, td);
4000 
4001 	return (error);
4002 }
4003 
4004 int
4005 extattr_set_file(td, uap)
4006 	struct thread *td;
4007 	struct extattr_set_file_args /* {
4008 		const char *path;
4009 		int attrnamespace;
4010 		const char *attrname;
4011 		void *data;
4012 		size_t nbytes;
4013 	} */ *uap;
4014 {
4015 	struct nameidata nd;
4016 	char attrname[EXTATTR_MAXNAMELEN];
4017 	int error;
4018 
4019 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4020 	if (error)
4021 		return (error);
4022 
4023 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4024 	error = namei(&nd);
4025 	if (error)
4026 		return (error);
4027 	NDFREE(&nd, NDF_ONLY_PNBUF);
4028 
4029 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4030 	    uap->data, uap->nbytes, td);
4031 
4032 	vrele(nd.ni_vp);
4033 	return (error);
4034 }
4035 
4036 int
4037 extattr_set_link(td, uap)
4038 	struct thread *td;
4039 	struct extattr_set_link_args /* {
4040 		const char *path;
4041 		int attrnamespace;
4042 		const char *attrname;
4043 		void *data;
4044 		size_t nbytes;
4045 	} */ *uap;
4046 {
4047 	struct nameidata nd;
4048 	char attrname[EXTATTR_MAXNAMELEN];
4049 	int error;
4050 
4051 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4052 	if (error)
4053 		return (error);
4054 
4055 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4056 	error = namei(&nd);
4057 	if (error)
4058 		return (error);
4059 	NDFREE(&nd, NDF_ONLY_PNBUF);
4060 
4061 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4062 	    uap->data, uap->nbytes, td);
4063 
4064 	vrele(nd.ni_vp);
4065 	return (error);
4066 }
4067 
4068 /*-
4069  * Get a named extended attribute on a file or directory
4070  *
4071  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4072  *            kernelspace string pointer "attrname", userspace buffer
4073  *            pointer "data", buffer length "nbytes", thread "td".
4074  * Returns: 0 on success, an error number otherwise
4075  * Locks: none
4076  * References: vp must be a valid reference for the duration of the call
4077  */
4078 static int
4079 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4080     void *data, size_t nbytes, struct thread *td)
4081 {
4082 	struct uio auio, *auiop;
4083 	struct iovec aiov;
4084 	ssize_t cnt;
4085 	size_t size, *sizep;
4086 	int error;
4087 
4088 	/*
4089 	 * XXX: Temporary API compatibility for applications that know
4090 	 * about this hack ("" means list), but haven't been updated
4091 	 * for the extattr_list_*() system calls yet.  This will go
4092 	 * away for FreeBSD 5.3.
4093 	 */
4094 	if (strlen(attrname) == 0)
4095 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4096 
4097 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4098 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4099 
4100 	/*
4101 	 * Slightly unusual semantics: if the user provides a NULL data
4102 	 * pointer, they don't want to receive the data, just the
4103 	 * maximum read length.
4104 	 */
4105 	auiop = NULL;
4106 	sizep = NULL;
4107 	cnt = 0;
4108 	if (data != NULL) {
4109 		aiov.iov_base = data;
4110 		aiov.iov_len = nbytes;
4111 		auio.uio_iov = &aiov;
4112 		auio.uio_offset = 0;
4113 		if (nbytes > INT_MAX) {
4114 			error = EINVAL;
4115 			goto done;
4116 		}
4117 		auio.uio_resid = nbytes;
4118 		auio.uio_rw = UIO_READ;
4119 		auio.uio_segflg = UIO_USERSPACE;
4120 		auio.uio_td = td;
4121 		auiop = &auio;
4122 		cnt = nbytes;
4123 	} else
4124 		sizep = &size;
4125 
4126 #ifdef MAC
4127 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4128 	    attrname, &auio);
4129 	if (error)
4130 		goto done;
4131 #endif
4132 
4133 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4134 	    td->td_ucred, td);
4135 
4136 	if (auiop != NULL) {
4137 		cnt -= auio.uio_resid;
4138 		td->td_retval[0] = cnt;
4139 	} else
4140 		td->td_retval[0] = size;
4141 
4142 done:
4143 	VOP_UNLOCK(vp, 0, td);
4144 	return (error);
4145 }
4146 
4147 int
4148 extattr_get_fd(td, uap)
4149 	struct thread *td;
4150 	struct extattr_get_fd_args /* {
4151 		int fd;
4152 		int attrnamespace;
4153 		const char *attrname;
4154 		void *data;
4155 		size_t nbytes;
4156 	} */ *uap;
4157 {
4158 	struct file *fp;
4159 	char attrname[EXTATTR_MAXNAMELEN];
4160 	int error;
4161 
4162 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4163 	if (error)
4164 		return (error);
4165 
4166 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4167 	if (error)
4168 		return (error);
4169 
4170 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4171 	    attrname, uap->data, uap->nbytes, td);
4172 
4173 	fdrop(fp, td);
4174 	return (error);
4175 }
4176 
4177 int
4178 extattr_get_file(td, uap)
4179 	struct thread *td;
4180 	struct extattr_get_file_args /* {
4181 		const char *path;
4182 		int attrnamespace;
4183 		const char *attrname;
4184 		void *data;
4185 		size_t nbytes;
4186 	} */ *uap;
4187 {
4188 	struct nameidata nd;
4189 	char attrname[EXTATTR_MAXNAMELEN];
4190 	int error;
4191 
4192 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4193 	if (error)
4194 		return (error);
4195 
4196 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4197 	error = namei(&nd);
4198 	if (error)
4199 		return (error);
4200 	NDFREE(&nd, NDF_ONLY_PNBUF);
4201 
4202 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4203 	    uap->data, uap->nbytes, td);
4204 
4205 	vrele(nd.ni_vp);
4206 	return (error);
4207 }
4208 
4209 int
4210 extattr_get_link(td, uap)
4211 	struct thread *td;
4212 	struct extattr_get_link_args /* {
4213 		const char *path;
4214 		int attrnamespace;
4215 		const char *attrname;
4216 		void *data;
4217 		size_t nbytes;
4218 	} */ *uap;
4219 {
4220 	struct nameidata nd;
4221 	char attrname[EXTATTR_MAXNAMELEN];
4222 	int error;
4223 
4224 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4225 	if (error)
4226 		return (error);
4227 
4228 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4229 	error = namei(&nd);
4230 	if (error)
4231 		return (error);
4232 	NDFREE(&nd, NDF_ONLY_PNBUF);
4233 
4234 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4235 	    uap->data, uap->nbytes, td);
4236 
4237 	vrele(nd.ni_vp);
4238 	return (error);
4239 }
4240 
4241 /*
4242  * extattr_delete_vp(): Delete a named extended attribute on a file or
4243  *                      directory
4244  *
4245  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4246  *            kernelspace string pointer "attrname", proc "p"
4247  * Returns: 0 on success, an error number otherwise
4248  * Locks: none
4249  * References: vp must be a valid reference for the duration of the call
4250  */
4251 static int
4252 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4253     struct thread *td)
4254 {
4255 	struct mount *mp;
4256 	int error;
4257 
4258 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4259 	if (error)
4260 		return (error);
4261 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4262 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4263 
4264 #ifdef MAC
4265 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4266 	    attrname);
4267 	if (error)
4268 		goto done;
4269 #endif
4270 
4271 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4272 	    td);
4273 	if (error == EOPNOTSUPP)
4274 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4275 		    td->td_ucred, td);
4276 #ifdef MAC
4277 done:
4278 #endif
4279 	VOP_UNLOCK(vp, 0, td);
4280 	vn_finished_write(mp);
4281 	return (error);
4282 }
4283 
4284 int
4285 extattr_delete_fd(td, uap)
4286 	struct thread *td;
4287 	struct extattr_delete_fd_args /* {
4288 		int fd;
4289 		int attrnamespace;
4290 		const char *attrname;
4291 	} */ *uap;
4292 {
4293 	struct file *fp;
4294 	struct vnode *vp;
4295 	char attrname[EXTATTR_MAXNAMELEN];
4296 	int error;
4297 
4298 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4299 	if (error)
4300 		return (error);
4301 
4302 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4303 	if (error)
4304 		return (error);
4305 	vp = fp->f_vnode;
4306 
4307 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4308 	fdrop(fp, td);
4309 	return (error);
4310 }
4311 
4312 int
4313 extattr_delete_file(td, uap)
4314 	struct thread *td;
4315 	struct extattr_delete_file_args /* {
4316 		const char *path;
4317 		int attrnamespace;
4318 		const char *attrname;
4319 	} */ *uap;
4320 {
4321 	struct nameidata nd;
4322 	char attrname[EXTATTR_MAXNAMELEN];
4323 	int error;
4324 
4325 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4326 	if (error)
4327 		return(error);
4328 
4329 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4330 	error = namei(&nd);
4331 	if (error)
4332 		return(error);
4333 	NDFREE(&nd, NDF_ONLY_PNBUF);
4334 
4335 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4336 	vrele(nd.ni_vp);
4337 	return(error);
4338 }
4339 
4340 int
4341 extattr_delete_link(td, uap)
4342 	struct thread *td;
4343 	struct extattr_delete_link_args /* {
4344 		const char *path;
4345 		int attrnamespace;
4346 		const char *attrname;
4347 	} */ *uap;
4348 {
4349 	struct nameidata nd;
4350 	char attrname[EXTATTR_MAXNAMELEN];
4351 	int error;
4352 
4353 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4354 	if (error)
4355 		return(error);
4356 
4357 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4358 	error = namei(&nd);
4359 	if (error)
4360 		return(error);
4361 	NDFREE(&nd, NDF_ONLY_PNBUF);
4362 
4363 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4364 	vrele(nd.ni_vp);
4365 	return(error);
4366 }
4367 
4368 /*-
4369  * Retrieve a list of extended attributes on a file or directory.
4370  *
4371  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4372  *            userspace buffer pointer "data", buffer length "nbytes",
4373  *            thread "td".
4374  * Returns: 0 on success, an error number otherwise
4375  * Locks: none
4376  * References: vp must be a valid reference for the duration of the call
4377  */
4378 static int
4379 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4380     size_t nbytes, struct thread *td)
4381 {
4382 	struct uio auio, *auiop;
4383 	size_t size, *sizep;
4384 	struct iovec aiov;
4385 	ssize_t cnt;
4386 	int error;
4387 
4388 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4389 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4390 
4391 	auiop = NULL;
4392 	sizep = NULL;
4393 	cnt = 0;
4394 	if (data != NULL) {
4395 		aiov.iov_base = data;
4396 		aiov.iov_len = nbytes;
4397 		auio.uio_iov = &aiov;
4398 		auio.uio_offset = 0;
4399 		if (nbytes > INT_MAX) {
4400 			error = EINVAL;
4401 			goto done;
4402 		}
4403 		auio.uio_resid = nbytes;
4404 		auio.uio_rw = UIO_READ;
4405 		auio.uio_segflg = UIO_USERSPACE;
4406 		auio.uio_td = td;
4407 		auiop = &auio;
4408 		cnt = nbytes;
4409 	} else
4410 		sizep = &size;
4411 
4412 #ifdef MAC
4413 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4414 	if (error)
4415 		goto done;
4416 #endif
4417 
4418 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4419 	    td->td_ucred, td);
4420 
4421 	if (auiop != NULL) {
4422 		cnt -= auio.uio_resid;
4423 		td->td_retval[0] = cnt;
4424 	} else
4425 		td->td_retval[0] = size;
4426 
4427 done:
4428 	VOP_UNLOCK(vp, 0, td);
4429 	return (error);
4430 }
4431 
4432 
4433 int
4434 extattr_list_fd(td, uap)
4435 	struct thread *td;
4436 	struct extattr_list_fd_args /* {
4437 		int fd;
4438 		int attrnamespace;
4439 		void *data;
4440 		size_t nbytes;
4441 	} */ *uap;
4442 {
4443 	struct file *fp;
4444 	int error;
4445 
4446 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4447 	if (error)
4448 		return (error);
4449 
4450 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4451 	    uap->nbytes, td);
4452 
4453 	fdrop(fp, td);
4454 	return (error);
4455 }
4456 
4457 int
4458 extattr_list_file(td, uap)
4459 	struct thread*td;
4460 	struct extattr_list_file_args /* {
4461 		const char *path;
4462 		int attrnamespace;
4463 		void *data;
4464 		size_t nbytes;
4465 	} */ *uap;
4466 {
4467 	struct nameidata nd;
4468 	int error;
4469 
4470 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4471 	error = namei(&nd);
4472 	if (error)
4473 		return (error);
4474 	NDFREE(&nd, NDF_ONLY_PNBUF);
4475 
4476 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4477 	    uap->nbytes, td);
4478 
4479 	vrele(nd.ni_vp);
4480 	return (error);
4481 }
4482 
4483 int
4484 extattr_list_link(td, uap)
4485 	struct thread*td;
4486 	struct extattr_list_link_args /* {
4487 		const char *path;
4488 		int attrnamespace;
4489 		void *data;
4490 		size_t nbytes;
4491 	} */ *uap;
4492 {
4493 	struct nameidata nd;
4494 	int error;
4495 
4496 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4497 	error = namei(&nd);
4498 	if (error)
4499 		return (error);
4500 	NDFREE(&nd, NDF_ONLY_PNBUF);
4501 
4502 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4503 	    uap->nbytes, td);
4504 
4505 	vrele(nd.ni_vp);
4506 	return (error);
4507 }
4508 
4509