xref: /freebsd/sys/kern/vfs_syscalls.c (revision 390e8cc2974df1888369c06339ef8e0e92b312b6)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_mac.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/mac.h>
52 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
55 #include <sys/sysproto.h>
56 #include <sys/namei.h>
57 #include <sys/filedesc.h>
58 #include <sys/kernel.h>
59 #include <sys/fcntl.h>
60 #include <sys/file.h>
61 #include <sys/limits.h>
62 #include <sys/linker.h>
63 #include <sys/stat.h>
64 #include <sys/sx.h>
65 #include <sys/unistd.h>
66 #include <sys/vnode.h>
67 #include <sys/proc.h>
68 #include <sys/dirent.h>
69 #include <sys/extattr.h>
70 #include <sys/jail.h>
71 #include <sys/syscallsubr.h>
72 #include <sys/sysctl.h>
73 
74 #include <machine/stdarg.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 int (*softdep_fsync_hook)(struct vnode *);
93 
94 /*
95  * The module initialization routine for POSIX asynchronous I/O will
96  * set this to the version of AIO that it implements.  (Zero means
97  * that it is not implemented.)  This value is used here by pathconf()
98  * and in kern_descrip.c by fpathconf().
99  */
100 int async_io_version;
101 
102 /*
103  * Sync each mounted filesystem.
104  */
105 #ifndef _SYS_SYSPROTO_H_
106 struct sync_args {
107         int     dummy;
108 };
109 #endif
110 
111 #ifdef DEBUG
112 static int syncprt = 0;
113 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
114 #endif
115 
116 /* ARGSUSED */
117 int
118 sync(td, uap)
119 	struct thread *td;
120 	struct sync_args *uap;
121 {
122 	struct mount *mp, *nmp;
123 	int asyncflag;
124 
125 	mtx_lock(&mountlist_mtx);
126 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128 			nmp = TAILQ_NEXT(mp, mnt_list);
129 			continue;
130 		}
131 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133 			asyncflag = mp->mnt_flag & MNT_ASYNC;
134 			mp->mnt_flag &= ~MNT_ASYNC;
135 			vfs_msync(mp, MNT_NOWAIT);
136 			VFS_SYNC(mp, MNT_NOWAIT,
137 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
138 			mp->mnt_flag |= asyncflag;
139 			vn_finished_write(mp);
140 		}
141 		mtx_lock(&mountlist_mtx);
142 		nmp = TAILQ_NEXT(mp, mnt_list);
143 		vfs_unbusy(mp, td);
144 	}
145 	mtx_unlock(&mountlist_mtx);
146 #if 0
147 /*
148  * XXX don't call vfs_bufstats() yet because that routine
149  * was not imported in the Lite2 merge.
150  */
151 #ifdef DIAGNOSTIC
152 	if (syncprt)
153 		vfs_bufstats();
154 #endif /* DIAGNOSTIC */
155 #endif
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  */
168 #ifndef _SYS_SYSPROTO_H_
169 struct quotactl_args {
170 	char *path;
171 	int cmd;
172 	int uid;
173 	caddr_t arg;
174 };
175 #endif
176 /* ARGSUSED */
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp;
188 	int error;
189 	struct nameidata nd;
190 
191 	if (jailed(td->td_ucred) && !prison_quotas)
192 		return (EPERM);
193 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
194 	if ((error = namei(&nd)) != 0)
195 		return (error);
196 	NDFREE(&nd, NDF_ONLY_PNBUF);
197 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
198 	vrele(nd.ni_vp);
199 	if (error)
200 		return (error);
201 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202 	vn_finished_write(mp);
203 	return (error);
204 }
205 
206 /*
207  * Get filesystem statistics.
208  */
209 #ifndef _SYS_SYSPROTO_H_
210 struct statfs_args {
211 	char *path;
212 	struct statfs *buf;
213 };
214 #endif
215 /* ARGSUSED */
216 int
217 statfs(td, uap)
218 	struct thread *td;
219 	register struct statfs_args /* {
220 		char *path;
221 		struct statfs *buf;
222 	} */ *uap;
223 {
224 	register struct mount *mp;
225 	register struct statfs *sp;
226 	int error;
227 	struct nameidata nd;
228 	struct statfs sb;
229 
230 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
231 	if ((error = namei(&nd)) != 0)
232 		return (error);
233 	mp = nd.ni_vp->v_mount;
234 	sp = &mp->mnt_stat;
235 	NDFREE(&nd, NDF_ONLY_PNBUF);
236 	vrele(nd.ni_vp);
237 #ifdef MAC
238 	error = mac_check_mount_stat(td->td_ucred, mp);
239 	if (error)
240 		return (error);
241 #endif
242 	error = VFS_STATFS(mp, sp, td);
243 	if (error)
244 		return (error);
245 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
246 	if (suser(td)) {
247 		bcopy(sp, &sb, sizeof(sb));
248 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
249 		sp = &sb;
250 	}
251 	return (copyout(sp, uap->buf, sizeof(*sp)));
252 }
253 
254 /*
255  * Get filesystem statistics.
256  */
257 #ifndef _SYS_SYSPROTO_H_
258 struct fstatfs_args {
259 	int fd;
260 	struct statfs *buf;
261 };
262 #endif
263 /* ARGSUSED */
264 int
265 fstatfs(td, uap)
266 	struct thread *td;
267 	register struct fstatfs_args /* {
268 		int fd;
269 		struct statfs *buf;
270 	} */ *uap;
271 {
272 	struct file *fp;
273 	struct mount *mp;
274 	register struct statfs *sp;
275 	int error;
276 	struct statfs sb;
277 
278 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
279 		return (error);
280 	mp = ((struct vnode *)fp->f_data)->v_mount;
281 	fdrop(fp, td);
282 	if (mp == NULL)
283 		return (EBADF);
284 #ifdef MAC
285 	error = mac_check_mount_stat(td->td_ucred, mp);
286 	if (error)
287 		return (error);
288 #endif
289 	sp = &mp->mnt_stat;
290 	error = VFS_STATFS(mp, sp, td);
291 	if (error)
292 		return (error);
293 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
294 	if (suser(td)) {
295 		bcopy(sp, &sb, sizeof(sb));
296 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
297 		sp = &sb;
298 	}
299 	return (copyout(sp, uap->buf, sizeof(*sp)));
300 }
301 
302 /*
303  * Get statistics on all filesystems.
304  */
305 #ifndef _SYS_SYSPROTO_H_
306 struct getfsstat_args {
307 	struct statfs *buf;
308 	long bufsize;
309 	int flags;
310 };
311 #endif
312 int
313 getfsstat(td, uap)
314 	struct thread *td;
315 	register struct getfsstat_args /* {
316 		struct statfs *buf;
317 		long bufsize;
318 		int flags;
319 	} */ *uap;
320 {
321 	register struct mount *mp, *nmp;
322 	register struct statfs *sp;
323 	caddr_t sfsp;
324 	long count, maxcount, error;
325 
326 	maxcount = uap->bufsize / sizeof(struct statfs);
327 	sfsp = (caddr_t)uap->buf;
328 	count = 0;
329 	mtx_lock(&mountlist_mtx);
330 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
331 #ifdef MAC
332 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
333 			nmp = TAILQ_NEXT(mp, mnt_list);
334 			continue;
335 		}
336 #endif
337 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
338 			nmp = TAILQ_NEXT(mp, mnt_list);
339 			continue;
340 		}
341 		if (sfsp && count < maxcount) {
342 			sp = &mp->mnt_stat;
343 			/*
344 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
345 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
346 			 * overrides MNT_WAIT.
347 			 */
348 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
349 			    (uap->flags & MNT_WAIT)) &&
350 			    (error = VFS_STATFS(mp, sp, td))) {
351 				mtx_lock(&mountlist_mtx);
352 				nmp = TAILQ_NEXT(mp, mnt_list);
353 				vfs_unbusy(mp, td);
354 				continue;
355 			}
356 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357 			error = copyout(sp, sfsp, sizeof(*sp));
358 			if (error) {
359 				vfs_unbusy(mp, td);
360 				return (error);
361 			}
362 			sfsp += sizeof(*sp);
363 		}
364 		count++;
365 		mtx_lock(&mountlist_mtx);
366 		nmp = TAILQ_NEXT(mp, mnt_list);
367 		vfs_unbusy(mp, td);
368 	}
369 	mtx_unlock(&mountlist_mtx);
370 	if (sfsp && count > maxcount)
371 		td->td_retval[0] = maxcount;
372 	else
373 		td->td_retval[0] = count;
374 	return (0);
375 }
376 
377 /*
378  * Change current working directory to a given file descriptor.
379  */
380 #ifndef _SYS_SYSPROTO_H_
381 struct fchdir_args {
382 	int	fd;
383 };
384 #endif
385 /* ARGSUSED */
386 int
387 fchdir(td, uap)
388 	struct thread *td;
389 	struct fchdir_args /* {
390 		int fd;
391 	} */ *uap;
392 {
393 	register struct filedesc *fdp = td->td_proc->p_fd;
394 	struct vnode *vp, *tdp, *vpold;
395 	struct mount *mp;
396 	struct file *fp;
397 	int error;
398 
399 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
400 		return (error);
401 	vp = fp->f_data;
402 	VREF(vp);
403 	fdrop(fp, td);
404 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
405 	if (vp->v_type != VDIR)
406 		error = ENOTDIR;
407 #ifdef MAC
408 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
409 	}
410 #endif
411 	else
412 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
413 	while (!error && (mp = vp->v_mountedhere) != NULL) {
414 		if (vfs_busy(mp, 0, 0, td))
415 			continue;
416 		error = VFS_ROOT(mp, &tdp);
417 		vfs_unbusy(mp, td);
418 		if (error)
419 			break;
420 		vput(vp);
421 		vp = tdp;
422 	}
423 	if (error) {
424 		vput(vp);
425 		return (error);
426 	}
427 	VOP_UNLOCK(vp, 0, td);
428 	FILEDESC_LOCK(fdp);
429 	vpold = fdp->fd_cdir;
430 	fdp->fd_cdir = vp;
431 	FILEDESC_UNLOCK(fdp);
432 	vrele(vpold);
433 	return (0);
434 }
435 
436 /*
437  * Change current working directory (``.'').
438  */
439 #ifndef _SYS_SYSPROTO_H_
440 struct chdir_args {
441 	char	*path;
442 };
443 #endif
444 /* ARGSUSED */
445 int
446 chdir(td, uap)
447 	struct thread *td;
448 	struct chdir_args /* {
449 		char *path;
450 	} */ *uap;
451 {
452 
453 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
454 }
455 
456 int
457 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
458 {
459 	register struct filedesc *fdp = td->td_proc->p_fd;
460 	int error;
461 	struct nameidata nd;
462 	struct vnode *vp;
463 
464 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
465 	if ((error = namei(&nd)) != 0)
466 		return (error);
467 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
468 		vput(nd.ni_vp);
469 		NDFREE(&nd, NDF_ONLY_PNBUF);
470 		return (error);
471 	}
472 	VOP_UNLOCK(nd.ni_vp, 0, td);
473 	NDFREE(&nd, NDF_ONLY_PNBUF);
474 	FILEDESC_LOCK(fdp);
475 	vp = fdp->fd_cdir;
476 	fdp->fd_cdir = nd.ni_vp;
477 	FILEDESC_UNLOCK(fdp);
478 	vrele(vp);
479 	return (0);
480 }
481 
482 /*
483  * Helper function for raised chroot(2) security function:  Refuse if
484  * any filedescriptors are open directories.
485  */
486 static int
487 chroot_refuse_vdir_fds(fdp)
488 	struct filedesc *fdp;
489 {
490 	struct vnode *vp;
491 	struct file *fp;
492 	int fd;
493 
494 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
495 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
496 		fp = fget_locked(fdp, fd);
497 		if (fp == NULL)
498 			continue;
499 		if (fp->f_type == DTYPE_VNODE) {
500 			vp = fp->f_data;
501 			if (vp->v_type == VDIR)
502 				return (EPERM);
503 		}
504 	}
505 	return (0);
506 }
507 
508 /*
509  * This sysctl determines if we will allow a process to chroot(2) if it
510  * has a directory open:
511  *	0: disallowed for all processes.
512  *	1: allowed for processes that were not already chroot(2)'ed.
513  *	2: allowed for all processes.
514  */
515 
516 static int chroot_allow_open_directories = 1;
517 
518 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
519      &chroot_allow_open_directories, 0, "");
520 
521 /*
522  * Change notion of root (``/'') directory.
523  */
524 #ifndef _SYS_SYSPROTO_H_
525 struct chroot_args {
526 	char	*path;
527 };
528 #endif
529 /* ARGSUSED */
530 int
531 chroot(td, uap)
532 	struct thread *td;
533 	struct chroot_args /* {
534 		char *path;
535 	} */ *uap;
536 {
537 	int error;
538 	struct nameidata nd;
539 
540 	error = suser_cred(td->td_ucred, PRISON_ROOT);
541 	if (error)
542 		return (error);
543 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
544 	mtx_lock(&Giant);
545 	error = namei(&nd);
546 	if (error)
547 		goto error;
548 	if ((error = change_dir(nd.ni_vp, td)) != 0)
549 		goto e_vunlock;
550 #ifdef MAC
551 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
552 		goto e_vunlock;
553 #endif
554 	VOP_UNLOCK(nd.ni_vp, 0, td);
555 	error = change_root(nd.ni_vp, td);
556 	vrele(nd.ni_vp);
557 	NDFREE(&nd, NDF_ONLY_PNBUF);
558 	mtx_unlock(&Giant);
559 	return (error);
560 e_vunlock:
561 	vput(nd.ni_vp);
562 error:
563 	mtx_unlock(&Giant);
564 	NDFREE(&nd, NDF_ONLY_PNBUF);
565 	return (error);
566 }
567 
568 /*
569  * Common routine for chroot and chdir.  Callers must provide a locked vnode
570  * instance.
571  */
572 int
573 change_dir(vp, td)
574 	struct vnode *vp;
575 	struct thread *td;
576 {
577 	int error;
578 
579 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
580 	if (vp->v_type != VDIR)
581 		return (ENOTDIR);
582 #ifdef MAC
583 	error = mac_check_vnode_chdir(td->td_ucred, vp);
584 	if (error)
585 		return (error);
586 #endif
587 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
588 	return (error);
589 }
590 
591 /*
592  * Common routine for kern_chroot() and jail_attach().  The caller is
593  * responsible for invoking suser() and mac_check_chroot() to authorize this
594  * operation.
595  */
596 int
597 change_root(vp, td)
598 	struct vnode *vp;
599 	struct thread *td;
600 {
601 	struct filedesc *fdp;
602 	struct vnode *oldvp;
603 	int error;
604 
605 	mtx_assert(&Giant, MA_OWNED);
606 	fdp = td->td_proc->p_fd;
607 	FILEDESC_LOCK(fdp);
608 	if (chroot_allow_open_directories == 0 ||
609 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
610 		error = chroot_refuse_vdir_fds(fdp);
611 		if (error) {
612 			FILEDESC_UNLOCK(fdp);
613 			return (error);
614 		}
615 	}
616 	oldvp = fdp->fd_rdir;
617 	fdp->fd_rdir = vp;
618 	VREF(fdp->fd_rdir);
619 	if (!fdp->fd_jdir) {
620 		fdp->fd_jdir = vp;
621 		VREF(fdp->fd_jdir);
622 	}
623 	FILEDESC_UNLOCK(fdp);
624 	vrele(oldvp);
625 	return (0);
626 }
627 
628 /*
629  * Check permissions, allocate an open file structure,
630  * and call the device open routine if any.
631  */
632 #ifndef _SYS_SYSPROTO_H_
633 struct open_args {
634 	char	*path;
635 	int	flags;
636 	int	mode;
637 };
638 #endif
639 int
640 open(td, uap)
641 	struct thread *td;
642 	register struct open_args /* {
643 		char *path;
644 		int flags;
645 		int mode;
646 	} */ *uap;
647 {
648 
649 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
650 }
651 
652 int
653 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
654     int mode)
655 {
656 	struct proc *p = td->td_proc;
657 	struct filedesc *fdp = p->p_fd;
658 	struct file *fp;
659 	struct vnode *vp;
660 	struct vattr vat;
661 	struct mount *mp;
662 	int cmode;
663 	struct file *nfp;
664 	int type, indx, error;
665 	struct flock lf;
666 	struct nameidata nd;
667 
668 	if ((flags & O_ACCMODE) == O_ACCMODE)
669 		return (EINVAL);
670 	flags = FFLAGS(flags);
671 	error = falloc(td, &nfp, &indx);
672 	if (error)
673 		return (error);
674 	fp = nfp;
675 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
676 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
677 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
678 	/*
679 	 * Bump the ref count to prevent another process from closing
680 	 * the descriptor while we are blocked in vn_open()
681 	 */
682 	fhold(fp);
683 	error = vn_open(&nd, &flags, cmode);
684 	if (error) {
685 		/*
686 		 * release our own reference
687 		 */
688 		fdrop(fp, td);
689 
690 		/*
691 		 * handle special fdopen() case.  bleh.  dupfdopen() is
692 		 * responsible for dropping the old contents of ofiles[indx]
693 		 * if it succeeds.
694 		 */
695 		if ((error == ENODEV || error == ENXIO) &&
696 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
697 		    (error =
698 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
699 			td->td_retval[0] = indx;
700 			return (0);
701 		}
702 		/*
703 		 * Clean up the descriptor, but only if another thread hadn't
704 		 * replaced or closed it.
705 		 */
706 		FILEDESC_LOCK(fdp);
707 		if (fdp->fd_ofiles[indx] == fp) {
708 			fdp->fd_ofiles[indx] = NULL;
709 			FILEDESC_UNLOCK(fdp);
710 			fdrop(fp, td);
711 		} else
712 			FILEDESC_UNLOCK(fdp);
713 
714 		if (error == ERESTART)
715 			error = EINTR;
716 		return (error);
717 	}
718 	td->td_dupfd = 0;
719 	NDFREE(&nd, NDF_ONLY_PNBUF);
720 	vp = nd.ni_vp;
721 
722 	/*
723 	 * There should be 2 references on the file, one from the descriptor
724 	 * table, and one for us.
725 	 *
726 	 * Handle the case where someone closed the file (via its file
727 	 * descriptor) while we were blocked.  The end result should look
728 	 * like opening the file succeeded but it was immediately closed.
729 	 */
730 	FILEDESC_LOCK(fdp);
731 	FILE_LOCK(fp);
732 	if (fp->f_count == 1) {
733 		KASSERT(fdp->fd_ofiles[indx] != fp,
734 		    ("Open file descriptor lost all refs"));
735 		FILEDESC_UNLOCK(fdp);
736 		FILE_UNLOCK(fp);
737 		VOP_UNLOCK(vp, 0, td);
738 		vn_close(vp, flags & FMASK, fp->f_cred, td);
739 		fdrop(fp, td);
740 		td->td_retval[0] = indx;
741 		return 0;
742 	}
743 
744 	/* assert that vn_open created a backing object if one is needed */
745 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
746 		("open: vmio vnode has no backing object after vn_open"));
747 
748 	fp->f_data = vp;
749 	fp->f_flag = flags & FMASK;
750 	fp->f_ops = &vnops;
751 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
752 	FILEDESC_UNLOCK(fdp);
753 	FILE_UNLOCK(fp);
754 	VOP_UNLOCK(vp, 0, td);
755 	if (flags & (O_EXLOCK | O_SHLOCK)) {
756 		lf.l_whence = SEEK_SET;
757 		lf.l_start = 0;
758 		lf.l_len = 0;
759 		if (flags & O_EXLOCK)
760 			lf.l_type = F_WRLCK;
761 		else
762 			lf.l_type = F_RDLCK;
763 		type = F_FLOCK;
764 		if ((flags & FNONBLOCK) == 0)
765 			type |= F_WAIT;
766 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
767 			    type)) != 0)
768 			goto bad;
769 		fp->f_flag |= FHASLOCK;
770 	}
771 	if (flags & O_TRUNC) {
772 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
773 			goto bad;
774 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
775 		VATTR_NULL(&vat);
776 		vat.va_size = 0;
777 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
778 #ifdef MAC
779 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
780 		if (error == 0)
781 #endif
782 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
783 		VOP_UNLOCK(vp, 0, td);
784 		vn_finished_write(mp);
785 		if (error)
786 			goto bad;
787 	}
788 	/*
789 	 * Release our private reference, leaving the one associated with
790 	 * the descriptor table intact.
791 	 */
792 	fdrop(fp, td);
793 	td->td_retval[0] = indx;
794 	return (0);
795 bad:
796 	FILEDESC_LOCK(fdp);
797 	if (fdp->fd_ofiles[indx] == fp) {
798 		fdp->fd_ofiles[indx] = NULL;
799 		FILEDESC_UNLOCK(fdp);
800 		fdrop(fp, td);
801 	} else
802 		FILEDESC_UNLOCK(fdp);
803 	fdrop(fp, td);
804 	return (error);
805 }
806 
807 #ifdef COMPAT_43
808 /*
809  * Create a file.
810  */
811 #ifndef _SYS_SYSPROTO_H_
812 struct ocreat_args {
813 	char	*path;
814 	int	mode;
815 };
816 #endif
817 int
818 ocreat(td, uap)
819 	struct thread *td;
820 	register struct ocreat_args /* {
821 		char *path;
822 		int mode;
823 	} */ *uap;
824 {
825 	struct open_args /* {
826 		char *path;
827 		int flags;
828 		int mode;
829 	} */ nuap;
830 
831 	nuap.path = uap->path;
832 	nuap.mode = uap->mode;
833 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
834 	return (open(td, &nuap));
835 }
836 #endif /* COMPAT_43 */
837 
838 /*
839  * Create a special file.
840  */
841 #ifndef _SYS_SYSPROTO_H_
842 struct mknod_args {
843 	char	*path;
844 	int	mode;
845 	int	dev;
846 };
847 #endif
848 /* ARGSUSED */
849 int
850 mknod(td, uap)
851 	struct thread *td;
852 	register struct mknod_args /* {
853 		char *path;
854 		int mode;
855 		int dev;
856 	} */ *uap;
857 {
858 
859 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
860 }
861 
862 int
863 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
864     int dev)
865 {
866 	struct vnode *vp;
867 	struct mount *mp;
868 	struct vattr vattr;
869 	int error;
870 	int whiteout = 0;
871 	struct nameidata nd;
872 
873 	switch (mode & S_IFMT) {
874 	case S_IFCHR:
875 	case S_IFBLK:
876 		error = suser(td);
877 		break;
878 	default:
879 		error = suser_cred(td->td_ucred, PRISON_ROOT);
880 		break;
881 	}
882 	if (error)
883 		return (error);
884 restart:
885 	bwillwrite();
886 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
887 	if ((error = namei(&nd)) != 0)
888 		return (error);
889 	vp = nd.ni_vp;
890 	if (vp != NULL) {
891 		vrele(vp);
892 		error = EEXIST;
893 	} else {
894 		VATTR_NULL(&vattr);
895 		FILEDESC_LOCK(td->td_proc->p_fd);
896 		vattr.va_mode = (mode & ALLPERMS) &
897 		    ~td->td_proc->p_fd->fd_cmask;
898 		FILEDESC_UNLOCK(td->td_proc->p_fd);
899 		vattr.va_rdev = dev;
900 		whiteout = 0;
901 
902 		switch (mode & S_IFMT) {
903 		case S_IFMT:	/* used by badsect to flag bad sectors */
904 			vattr.va_type = VBAD;
905 			break;
906 		case S_IFCHR:
907 			vattr.va_type = VCHR;
908 			break;
909 		case S_IFBLK:
910 			vattr.va_type = VBLK;
911 			break;
912 		case S_IFWHT:
913 			whiteout = 1;
914 			break;
915 		default:
916 			error = EINVAL;
917 			break;
918 		}
919 	}
920 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
921 		NDFREE(&nd, NDF_ONLY_PNBUF);
922 		vput(nd.ni_dvp);
923 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
924 			return (error);
925 		goto restart;
926 	}
927 #ifdef MAC
928 	if (error == 0 && !whiteout)
929 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
930 		    &nd.ni_cnd, &vattr);
931 #endif
932 	if (!error) {
933 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
934 		if (whiteout)
935 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
936 		else {
937 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
938 						&nd.ni_cnd, &vattr);
939 			if (error == 0)
940 				vput(nd.ni_vp);
941 		}
942 	}
943 	NDFREE(&nd, NDF_ONLY_PNBUF);
944 	vput(nd.ni_dvp);
945 	vn_finished_write(mp);
946 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
947 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
948 	return (error);
949 }
950 
951 /*
952  * Create a named pipe.
953  */
954 #ifndef _SYS_SYSPROTO_H_
955 struct mkfifo_args {
956 	char	*path;
957 	int	mode;
958 };
959 #endif
960 /* ARGSUSED */
961 int
962 mkfifo(td, uap)
963 	struct thread *td;
964 	register struct mkfifo_args /* {
965 		char *path;
966 		int mode;
967 	} */ *uap;
968 {
969 
970 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
971 }
972 
973 int
974 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
975 {
976 	struct mount *mp;
977 	struct vattr vattr;
978 	int error;
979 	struct nameidata nd;
980 
981 restart:
982 	bwillwrite();
983 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
984 	if ((error = namei(&nd)) != 0)
985 		return (error);
986 	if (nd.ni_vp != NULL) {
987 		NDFREE(&nd, NDF_ONLY_PNBUF);
988 		vrele(nd.ni_vp);
989 		vput(nd.ni_dvp);
990 		return (EEXIST);
991 	}
992 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
993 		NDFREE(&nd, NDF_ONLY_PNBUF);
994 		vput(nd.ni_dvp);
995 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
996 			return (error);
997 		goto restart;
998 	}
999 	VATTR_NULL(&vattr);
1000 	vattr.va_type = VFIFO;
1001 	FILEDESC_LOCK(td->td_proc->p_fd);
1002 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1003 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1004 #ifdef MAC
1005 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1006 	    &vattr);
1007 	if (error)
1008 		goto out;
1009 #endif
1010 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1011 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1012 	if (error == 0)
1013 		vput(nd.ni_vp);
1014 #ifdef MAC
1015 out:
1016 #endif
1017 	NDFREE(&nd, NDF_ONLY_PNBUF);
1018 	vput(nd.ni_dvp);
1019 	vn_finished_write(mp);
1020 	return (error);
1021 }
1022 
1023 /*
1024  * Make a hard file link.
1025  */
1026 #ifndef _SYS_SYSPROTO_H_
1027 struct link_args {
1028 	char	*path;
1029 	char	*link;
1030 };
1031 #endif
1032 /* ARGSUSED */
1033 int
1034 link(td, uap)
1035 	struct thread *td;
1036 	register struct link_args /* {
1037 		char *path;
1038 		char *link;
1039 	} */ *uap;
1040 {
1041 
1042 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1043 }
1044 
1045 int
1046 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1047 {
1048 	struct vnode *vp;
1049 	struct mount *mp;
1050 	struct nameidata nd;
1051 	int error;
1052 
1053 	bwillwrite();
1054 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1055 	if ((error = namei(&nd)) != 0)
1056 		return (error);
1057 	NDFREE(&nd, NDF_ONLY_PNBUF);
1058 	vp = nd.ni_vp;
1059 	if (vp->v_type == VDIR) {
1060 		vrele(vp);
1061 		return (EPERM);		/* POSIX */
1062 	}
1063 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1064 		vrele(vp);
1065 		return (error);
1066 	}
1067 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1068 	if ((error = namei(&nd)) == 0) {
1069 		if (nd.ni_vp != NULL) {
1070 			vrele(nd.ni_vp);
1071 			error = EEXIST;
1072 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1073 		    == 0) {
1074 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1075 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1076 #ifdef MAC
1077 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1078 			    vp, &nd.ni_cnd);
1079 			if (error == 0)
1080 #endif
1081 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1082 			VOP_UNLOCK(vp, 0, td);
1083 		}
1084 		NDFREE(&nd, NDF_ONLY_PNBUF);
1085 		vput(nd.ni_dvp);
1086 	}
1087 	vrele(vp);
1088 	vn_finished_write(mp);
1089 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1090 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1091 	return (error);
1092 }
1093 
1094 /*
1095  * Make a symbolic link.
1096  */
1097 #ifndef _SYS_SYSPROTO_H_
1098 struct symlink_args {
1099 	char	*path;
1100 	char	*link;
1101 };
1102 #endif
1103 /* ARGSUSED */
1104 int
1105 symlink(td, uap)
1106 	struct thread *td;
1107 	register struct symlink_args /* {
1108 		char *path;
1109 		char *link;
1110 	} */ *uap;
1111 {
1112 
1113 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1114 }
1115 
1116 int
1117 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1118 {
1119 	struct mount *mp;
1120 	struct vattr vattr;
1121 	char *syspath;
1122 	int error;
1123 	struct nameidata nd;
1124 
1125 	if (segflg == UIO_SYSSPACE) {
1126 		syspath = path;
1127 	} else {
1128 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1129 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1130 			goto out;
1131 	}
1132 restart:
1133 	bwillwrite();
1134 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1135 	if ((error = namei(&nd)) != 0)
1136 		goto out;
1137 	if (nd.ni_vp) {
1138 		NDFREE(&nd, NDF_ONLY_PNBUF);
1139 		vrele(nd.ni_vp);
1140 		vput(nd.ni_dvp);
1141 		error = EEXIST;
1142 		goto out;
1143 	}
1144 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1145 		NDFREE(&nd, NDF_ONLY_PNBUF);
1146 		vput(nd.ni_dvp);
1147 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1148 			return (error);
1149 		goto restart;
1150 	}
1151 	VATTR_NULL(&vattr);
1152 	FILEDESC_LOCK(td->td_proc->p_fd);
1153 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1154 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1155 #ifdef MAC
1156 	vattr.va_type = VLNK;
1157 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1158 	    &vattr);
1159 	if (error)
1160 		goto out2;
1161 #endif
1162 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1163 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1164 	if (error == 0)
1165 		vput(nd.ni_vp);
1166 #ifdef MAC
1167 out2:
1168 #endif
1169 	NDFREE(&nd, NDF_ONLY_PNBUF);
1170 	vput(nd.ni_dvp);
1171 	vn_finished_write(mp);
1172 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1173 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1174 out:
1175 	if (segflg != UIO_SYSSPACE)
1176 		uma_zfree(namei_zone, syspath);
1177 	return (error);
1178 }
1179 
1180 /*
1181  * Delete a whiteout from the filesystem.
1182  */
1183 /* ARGSUSED */
1184 int
1185 undelete(td, uap)
1186 	struct thread *td;
1187 	register struct undelete_args /* {
1188 		char *path;
1189 	} */ *uap;
1190 {
1191 	int error;
1192 	struct mount *mp;
1193 	struct nameidata nd;
1194 
1195 restart:
1196 	bwillwrite();
1197 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1198 	    uap->path, td);
1199 	error = namei(&nd);
1200 	if (error)
1201 		return (error);
1202 
1203 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1204 		NDFREE(&nd, NDF_ONLY_PNBUF);
1205 		if (nd.ni_vp)
1206 			vrele(nd.ni_vp);
1207 		vput(nd.ni_dvp);
1208 		return (EEXIST);
1209 	}
1210 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1211 		NDFREE(&nd, NDF_ONLY_PNBUF);
1212 		vput(nd.ni_dvp);
1213 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1214 			return (error);
1215 		goto restart;
1216 	}
1217 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1218 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1219 	NDFREE(&nd, NDF_ONLY_PNBUF);
1220 	vput(nd.ni_dvp);
1221 	vn_finished_write(mp);
1222 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1223 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1224 	return (error);
1225 }
1226 
1227 /*
1228  * Delete a name from the filesystem.
1229  */
1230 #ifndef _SYS_SYSPROTO_H_
1231 struct unlink_args {
1232 	char	*path;
1233 };
1234 #endif
1235 /* ARGSUSED */
1236 int
1237 unlink(td, uap)
1238 	struct thread *td;
1239 	struct unlink_args /* {
1240 		char *path;
1241 	} */ *uap;
1242 {
1243 
1244 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1245 }
1246 
1247 int
1248 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1249 {
1250 	struct mount *mp;
1251 	struct vnode *vp;
1252 	int error;
1253 	struct nameidata nd;
1254 
1255 restart:
1256 	bwillwrite();
1257 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1258 	if ((error = namei(&nd)) != 0)
1259 		return (error);
1260 	vp = nd.ni_vp;
1261 	if (vp->v_type == VDIR)
1262 		error = EPERM;		/* POSIX */
1263 	else {
1264 		/*
1265 		 * The root of a mounted filesystem cannot be deleted.
1266 		 *
1267 		 * XXX: can this only be a VDIR case?
1268 		 */
1269 		if (vp->v_vflag & VV_ROOT)
1270 			error = EBUSY;
1271 	}
1272 	if (error == 0) {
1273 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1274 			NDFREE(&nd, NDF_ONLY_PNBUF);
1275 			if (vp == nd.ni_dvp)
1276 				vrele(vp);
1277 			else
1278 				vput(vp);
1279 			vput(nd.ni_dvp);
1280 			if ((error = vn_start_write(NULL, &mp,
1281 			    V_XSLEEP | PCATCH)) != 0)
1282 				return (error);
1283 			goto restart;
1284 		}
1285 #ifdef MAC
1286 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1287 		    &nd.ni_cnd);
1288 		if (error)
1289 			goto out;
1290 #endif
1291 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1292 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1293 #ifdef MAC
1294 out:
1295 #endif
1296 		vn_finished_write(mp);
1297 	}
1298 	NDFREE(&nd, NDF_ONLY_PNBUF);
1299 	if (vp == nd.ni_dvp)
1300 		vrele(vp);
1301 	else
1302 		vput(vp);
1303 	vput(nd.ni_dvp);
1304 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1305 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1306 	return (error);
1307 }
1308 
1309 /*
1310  * Reposition read/write file offset.
1311  */
1312 #ifndef _SYS_SYSPROTO_H_
1313 struct lseek_args {
1314 	int	fd;
1315 	int	pad;
1316 	off_t	offset;
1317 	int	whence;
1318 };
1319 #endif
1320 int
1321 lseek(td, uap)
1322 	struct thread *td;
1323 	register struct lseek_args /* {
1324 		int fd;
1325 		int pad;
1326 		off_t offset;
1327 		int whence;
1328 	} */ *uap;
1329 {
1330 	struct ucred *cred = td->td_ucred;
1331 	struct file *fp;
1332 	struct vnode *vp;
1333 	struct vattr vattr;
1334 	off_t offset;
1335 	int error, noneg;
1336 
1337 	if ((error = fget(td, uap->fd, &fp)) != 0)
1338 		return (error);
1339 	if (fp->f_type != DTYPE_VNODE) {
1340 		fdrop(fp, td);
1341 		return (ESPIPE);
1342 	}
1343 	vp = fp->f_data;
1344 	noneg = (vp->v_type != VCHR);
1345 	offset = uap->offset;
1346 	switch (uap->whence) {
1347 	case L_INCR:
1348 		if (noneg &&
1349 		    (fp->f_offset < 0 ||
1350 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1351 			error = EOVERFLOW;
1352 			break;
1353 		}
1354 		offset += fp->f_offset;
1355 		break;
1356 	case L_XTND:
1357 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1358 		error = VOP_GETATTR(vp, &vattr, cred, td);
1359 		VOP_UNLOCK(vp, 0, td);
1360 		if (error)
1361 			break;
1362 		if (noneg &&
1363 		    (vattr.va_size > OFF_MAX ||
1364 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1365 			error = EOVERFLOW;
1366 			break;
1367 		}
1368 		offset += vattr.va_size;
1369 		break;
1370 	case L_SET:
1371 		break;
1372 	default:
1373 		error = EINVAL;
1374 	}
1375 	if (error == 0 && noneg && offset < 0)
1376 		error = EINVAL;
1377 	if (error != 0) {
1378 		fdrop(fp, td);
1379 		return (error);
1380 	}
1381 	fp->f_offset = offset;
1382 	*(off_t *)(td->td_retval) = fp->f_offset;
1383 	fdrop(fp, td);
1384 	return (0);
1385 }
1386 
1387 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1388 /*
1389  * Reposition read/write file offset.
1390  */
1391 #ifndef _SYS_SYSPROTO_H_
1392 struct olseek_args {
1393 	int	fd;
1394 	long	offset;
1395 	int	whence;
1396 };
1397 #endif
1398 int
1399 olseek(td, uap)
1400 	struct thread *td;
1401 	register struct olseek_args /* {
1402 		int fd;
1403 		long offset;
1404 		int whence;
1405 	} */ *uap;
1406 {
1407 	struct lseek_args /* {
1408 		int fd;
1409 		int pad;
1410 		off_t offset;
1411 		int whence;
1412 	} */ nuap;
1413 	int error;
1414 
1415 	nuap.fd = uap->fd;
1416 	nuap.offset = uap->offset;
1417 	nuap.whence = uap->whence;
1418 	error = lseek(td, &nuap);
1419 	return (error);
1420 }
1421 #endif /* COMPAT_43 */
1422 
1423 /*
1424  * Check access permissions using passed credentials.
1425  */
1426 static int
1427 vn_access(vp, user_flags, cred, td)
1428 	struct vnode	*vp;
1429 	int		user_flags;
1430 	struct ucred	*cred;
1431 	struct thread	*td;
1432 {
1433 	int error, flags;
1434 
1435 	/* Flags == 0 means only check for existence. */
1436 	error = 0;
1437 	if (user_flags) {
1438 		flags = 0;
1439 		if (user_flags & R_OK)
1440 			flags |= VREAD;
1441 		if (user_flags & W_OK)
1442 			flags |= VWRITE;
1443 		if (user_flags & X_OK)
1444 			flags |= VEXEC;
1445 #ifdef MAC
1446 		error = mac_check_vnode_access(cred, vp, flags);
1447 		if (error)
1448 			return (error);
1449 #endif
1450 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1451 			error = VOP_ACCESS(vp, flags, cred, td);
1452 	}
1453 	return (error);
1454 }
1455 
1456 /*
1457  * Check access permissions using "real" credentials.
1458  */
1459 #ifndef _SYS_SYSPROTO_H_
1460 struct access_args {
1461 	char	*path;
1462 	int	flags;
1463 };
1464 #endif
1465 int
1466 access(td, uap)
1467 	struct thread *td;
1468 	register struct access_args /* {
1469 		char *path;
1470 		int flags;
1471 	} */ *uap;
1472 {
1473 
1474 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1475 }
1476 
1477 int
1478 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1479 {
1480 	struct ucred *cred, *tmpcred;
1481 	register struct vnode *vp;
1482 	int error;
1483 	struct nameidata nd;
1484 
1485 	/*
1486 	 * Create and modify a temporary credential instead of one that
1487 	 * is potentially shared.  This could also mess up socket
1488 	 * buffer accounting which can run in an interrupt context.
1489 	 *
1490 	 * XXX - Depending on how "threads" are finally implemented, it
1491 	 * may be better to explicitly pass the credential to namei()
1492 	 * rather than to modify the potentially shared process structure.
1493 	 */
1494 	cred = td->td_ucred;
1495 	tmpcred = crdup(cred);
1496 	tmpcred->cr_uid = cred->cr_ruid;
1497 	tmpcred->cr_groups[0] = cred->cr_rgid;
1498 	td->td_ucred = tmpcred;
1499 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1500 	if ((error = namei(&nd)) != 0)
1501 		goto out1;
1502 	vp = nd.ni_vp;
1503 
1504 	error = vn_access(vp, flags, tmpcred, td);
1505 	NDFREE(&nd, NDF_ONLY_PNBUF);
1506 	vput(vp);
1507 out1:
1508 	td->td_ucred = cred;
1509 	crfree(tmpcred);
1510 	return (error);
1511 }
1512 
1513 /*
1514  * Check access permissions using "effective" credentials.
1515  */
1516 #ifndef _SYS_SYSPROTO_H_
1517 struct eaccess_args {
1518 	char	*path;
1519 	int	flags;
1520 };
1521 #endif
1522 int
1523 eaccess(td, uap)
1524 	struct thread *td;
1525 	register struct eaccess_args /* {
1526 		char *path;
1527 		int flags;
1528 	} */ *uap;
1529 {
1530 	struct nameidata nd;
1531 	struct vnode *vp;
1532 	int error;
1533 
1534 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1535 	    uap->path, td);
1536 	if ((error = namei(&nd)) != 0)
1537 		return (error);
1538 	vp = nd.ni_vp;
1539 
1540 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1541 	NDFREE(&nd, NDF_ONLY_PNBUF);
1542 	vput(vp);
1543 	return (error);
1544 }
1545 
1546 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1547 /*
1548  * Get file status; this version follows links.
1549  */
1550 #ifndef _SYS_SYSPROTO_H_
1551 struct ostat_args {
1552 	char	*path;
1553 	struct ostat *ub;
1554 };
1555 #endif
1556 /* ARGSUSED */
1557 int
1558 ostat(td, uap)
1559 	struct thread *td;
1560 	register struct ostat_args /* {
1561 		char *path;
1562 		struct ostat *ub;
1563 	} */ *uap;
1564 {
1565 	struct stat sb;
1566 	struct ostat osb;
1567 	int error;
1568 	struct nameidata nd;
1569 
1570 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1571 	    uap->path, td);
1572 	if ((error = namei(&nd)) != 0)
1573 		return (error);
1574 	NDFREE(&nd, NDF_ONLY_PNBUF);
1575 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1576 	vput(nd.ni_vp);
1577 	if (error)
1578 		return (error);
1579 	cvtstat(&sb, &osb);
1580 	error = copyout(&osb, uap->ub, sizeof (osb));
1581 	return (error);
1582 }
1583 
1584 /*
1585  * Get file status; this version does not follow links.
1586  */
1587 #ifndef _SYS_SYSPROTO_H_
1588 struct olstat_args {
1589 	char	*path;
1590 	struct ostat *ub;
1591 };
1592 #endif
1593 /* ARGSUSED */
1594 int
1595 olstat(td, uap)
1596 	struct thread *td;
1597 	register struct olstat_args /* {
1598 		char *path;
1599 		struct ostat *ub;
1600 	} */ *uap;
1601 {
1602 	struct vnode *vp;
1603 	struct stat sb;
1604 	struct ostat osb;
1605 	int error;
1606 	struct nameidata nd;
1607 
1608 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1609 	    uap->path, td);
1610 	if ((error = namei(&nd)) != 0)
1611 		return (error);
1612 	vp = nd.ni_vp;
1613 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1614 	NDFREE(&nd, NDF_ONLY_PNBUF);
1615 	vput(vp);
1616 	if (error)
1617 		return (error);
1618 	cvtstat(&sb, &osb);
1619 	error = copyout(&osb, uap->ub, sizeof (osb));
1620 	return (error);
1621 }
1622 
1623 /*
1624  * Convert from an old to a new stat structure.
1625  */
1626 void
1627 cvtstat(st, ost)
1628 	struct stat *st;
1629 	struct ostat *ost;
1630 {
1631 
1632 	ost->st_dev = st->st_dev;
1633 	ost->st_ino = st->st_ino;
1634 	ost->st_mode = st->st_mode;
1635 	ost->st_nlink = st->st_nlink;
1636 	ost->st_uid = st->st_uid;
1637 	ost->st_gid = st->st_gid;
1638 	ost->st_rdev = st->st_rdev;
1639 	if (st->st_size < (quad_t)1 << 32)
1640 		ost->st_size = st->st_size;
1641 	else
1642 		ost->st_size = -2;
1643 	ost->st_atime = st->st_atime;
1644 	ost->st_mtime = st->st_mtime;
1645 	ost->st_ctime = st->st_ctime;
1646 	ost->st_blksize = st->st_blksize;
1647 	ost->st_blocks = st->st_blocks;
1648 	ost->st_flags = st->st_flags;
1649 	ost->st_gen = st->st_gen;
1650 }
1651 #endif /* COMPAT_43 || COMPAT_SUNOS */
1652 
1653 /*
1654  * Get file status; this version follows links.
1655  */
1656 #ifndef _SYS_SYSPROTO_H_
1657 struct stat_args {
1658 	char	*path;
1659 	struct stat *ub;
1660 };
1661 #endif
1662 /* ARGSUSED */
1663 int
1664 stat(td, uap)
1665 	struct thread *td;
1666 	register struct stat_args /* {
1667 		char *path;
1668 		struct stat *ub;
1669 	} */ *uap;
1670 {
1671 	struct stat sb;
1672 	int error;
1673 	struct nameidata nd;
1674 
1675 #ifdef LOOKUP_SHARED
1676 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1677 	    UIO_USERSPACE, uap->path, td);
1678 #else
1679 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1680 	    uap->path, td);
1681 #endif
1682 	if ((error = namei(&nd)) != 0)
1683 		return (error);
1684 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1685 	NDFREE(&nd, NDF_ONLY_PNBUF);
1686 	vput(nd.ni_vp);
1687 	if (error)
1688 		return (error);
1689 	error = copyout(&sb, uap->ub, sizeof (sb));
1690 	return (error);
1691 }
1692 
1693 /*
1694  * Get file status; this version does not follow links.
1695  */
1696 #ifndef _SYS_SYSPROTO_H_
1697 struct lstat_args {
1698 	char	*path;
1699 	struct stat *ub;
1700 };
1701 #endif
1702 /* ARGSUSED */
1703 int
1704 lstat(td, uap)
1705 	struct thread *td;
1706 	register struct lstat_args /* {
1707 		char *path;
1708 		struct stat *ub;
1709 	} */ *uap;
1710 {
1711 	int error;
1712 	struct vnode *vp;
1713 	struct stat sb;
1714 	struct nameidata nd;
1715 
1716 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717 	    uap->path, td);
1718 	if ((error = namei(&nd)) != 0)
1719 		return (error);
1720 	vp = nd.ni_vp;
1721 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1722 	NDFREE(&nd, NDF_ONLY_PNBUF);
1723 	vput(vp);
1724 	if (error)
1725 		return (error);
1726 	error = copyout(&sb, uap->ub, sizeof (sb));
1727 	return (error);
1728 }
1729 
1730 /*
1731  * Implementation of the NetBSD stat() function.
1732  * XXX This should probably be collapsed with the FreeBSD version,
1733  * as the differences are only due to vn_stat() clearing spares at
1734  * the end of the structures.  vn_stat could be split to avoid this,
1735  * and thus collapse the following to close to zero code.
1736  */
1737 void
1738 cvtnstat(sb, nsb)
1739 	struct stat *sb;
1740 	struct nstat *nsb;
1741 {
1742 	bzero(nsb, sizeof *nsb);
1743 	nsb->st_dev = sb->st_dev;
1744 	nsb->st_ino = sb->st_ino;
1745 	nsb->st_mode = sb->st_mode;
1746 	nsb->st_nlink = sb->st_nlink;
1747 	nsb->st_uid = sb->st_uid;
1748 	nsb->st_gid = sb->st_gid;
1749 	nsb->st_rdev = sb->st_rdev;
1750 	nsb->st_atimespec = sb->st_atimespec;
1751 	nsb->st_mtimespec = sb->st_mtimespec;
1752 	nsb->st_ctimespec = sb->st_ctimespec;
1753 	nsb->st_size = sb->st_size;
1754 	nsb->st_blocks = sb->st_blocks;
1755 	nsb->st_blksize = sb->st_blksize;
1756 	nsb->st_flags = sb->st_flags;
1757 	nsb->st_gen = sb->st_gen;
1758 	nsb->st_birthtimespec = sb->st_birthtimespec;
1759 }
1760 
1761 #ifndef _SYS_SYSPROTO_H_
1762 struct nstat_args {
1763 	char	*path;
1764 	struct nstat *ub;
1765 };
1766 #endif
1767 /* ARGSUSED */
1768 int
1769 nstat(td, uap)
1770 	struct thread *td;
1771 	register struct nstat_args /* {
1772 		char *path;
1773 		struct nstat *ub;
1774 	} */ *uap;
1775 {
1776 	struct stat sb;
1777 	struct nstat nsb;
1778 	int error;
1779 	struct nameidata nd;
1780 
1781 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1782 	    uap->path, td);
1783 	if ((error = namei(&nd)) != 0)
1784 		return (error);
1785 	NDFREE(&nd, NDF_ONLY_PNBUF);
1786 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1787 	vput(nd.ni_vp);
1788 	if (error)
1789 		return (error);
1790 	cvtnstat(&sb, &nsb);
1791 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1792 	return (error);
1793 }
1794 
1795 /*
1796  * NetBSD lstat.  Get file status; this version does not follow links.
1797  */
1798 #ifndef _SYS_SYSPROTO_H_
1799 struct lstat_args {
1800 	char	*path;
1801 	struct stat *ub;
1802 };
1803 #endif
1804 /* ARGSUSED */
1805 int
1806 nlstat(td, uap)
1807 	struct thread *td;
1808 	register struct nlstat_args /* {
1809 		char *path;
1810 		struct nstat *ub;
1811 	} */ *uap;
1812 {
1813 	int error;
1814 	struct vnode *vp;
1815 	struct stat sb;
1816 	struct nstat nsb;
1817 	struct nameidata nd;
1818 
1819 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1820 	    uap->path, td);
1821 	if ((error = namei(&nd)) != 0)
1822 		return (error);
1823 	vp = nd.ni_vp;
1824 	NDFREE(&nd, NDF_ONLY_PNBUF);
1825 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1826 	vput(vp);
1827 	if (error)
1828 		return (error);
1829 	cvtnstat(&sb, &nsb);
1830 	error = copyout(&nsb, uap->ub, sizeof (nsb));
1831 	return (error);
1832 }
1833 
1834 /*
1835  * Get configurable pathname variables.
1836  */
1837 #ifndef _SYS_SYSPROTO_H_
1838 struct pathconf_args {
1839 	char	*path;
1840 	int	name;
1841 };
1842 #endif
1843 /* ARGSUSED */
1844 int
1845 pathconf(td, uap)
1846 	struct thread *td;
1847 	register struct pathconf_args /* {
1848 		char *path;
1849 		int name;
1850 	} */ *uap;
1851 {
1852 	int error;
1853 	struct nameidata nd;
1854 
1855 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1856 	    uap->path, td);
1857 	if ((error = namei(&nd)) != 0)
1858 		return (error);
1859 	NDFREE(&nd, NDF_ONLY_PNBUF);
1860 
1861 	/* If asynchronous I/O is available, it works for all files. */
1862 	if (uap->name == _PC_ASYNC_IO)
1863 		td->td_retval[0] = async_io_version;
1864 	else
1865 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1866 	vput(nd.ni_vp);
1867 	return (error);
1868 }
1869 
1870 /*
1871  * Return target name of a symbolic link.
1872  */
1873 #ifndef _SYS_SYSPROTO_H_
1874 struct readlink_args {
1875 	char	*path;
1876 	char	*buf;
1877 	int	count;
1878 };
1879 #endif
1880 /* ARGSUSED */
1881 int
1882 readlink(td, uap)
1883 	struct thread *td;
1884 	register struct readlink_args /* {
1885 		char *path;
1886 		char *buf;
1887 		int count;
1888 	} */ *uap;
1889 {
1890 
1891 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1892 	    UIO_USERSPACE, uap->count));
1893 }
1894 
1895 int
1896 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1897     enum uio_seg bufseg, int count)
1898 {
1899 	register struct vnode *vp;
1900 	struct iovec aiov;
1901 	struct uio auio;
1902 	int error;
1903 	struct nameidata nd;
1904 
1905 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1906 	if ((error = namei(&nd)) != 0)
1907 		return (error);
1908 	NDFREE(&nd, NDF_ONLY_PNBUF);
1909 	vp = nd.ni_vp;
1910 #ifdef MAC
1911 	error = mac_check_vnode_readlink(td->td_ucred, vp);
1912 	if (error) {
1913 		vput(vp);
1914 		return (error);
1915 	}
1916 #endif
1917 	if (vp->v_type != VLNK)
1918 		error = EINVAL;
1919 	else {
1920 		aiov.iov_base = buf;
1921 		aiov.iov_len = count;
1922 		auio.uio_iov = &aiov;
1923 		auio.uio_iovcnt = 1;
1924 		auio.uio_offset = 0;
1925 		auio.uio_rw = UIO_READ;
1926 		auio.uio_segflg = bufseg;
1927 		auio.uio_td = td;
1928 		auio.uio_resid = count;
1929 		error = VOP_READLINK(vp, &auio, td->td_ucred);
1930 	}
1931 	vput(vp);
1932 	td->td_retval[0] = count - auio.uio_resid;
1933 	return (error);
1934 }
1935 
1936 /*
1937  * Common implementation code for chflags() and fchflags().
1938  */
1939 static int
1940 setfflags(td, vp, flags)
1941 	struct thread *td;
1942 	struct vnode *vp;
1943 	int flags;
1944 {
1945 	int error;
1946 	struct mount *mp;
1947 	struct vattr vattr;
1948 
1949 	/*
1950 	 * Prevent non-root users from setting flags on devices.  When
1951 	 * a device is reused, users can retain ownership of the device
1952 	 * if they are allowed to set flags and programs assume that
1953 	 * chown can't fail when done as root.
1954 	 */
1955 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1956 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1957 		if (error)
1958 			return (error);
1959 	}
1960 
1961 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1962 		return (error);
1963 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1964 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1965 	VATTR_NULL(&vattr);
1966 	vattr.va_flags = flags;
1967 #ifdef MAC
1968 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1969 	if (error == 0)
1970 #endif
1971 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1972 	VOP_UNLOCK(vp, 0, td);
1973 	vn_finished_write(mp);
1974 	return (error);
1975 }
1976 
1977 /*
1978  * Change flags of a file given a path name.
1979  */
1980 #ifndef _SYS_SYSPROTO_H_
1981 struct chflags_args {
1982 	char	*path;
1983 	int	flags;
1984 };
1985 #endif
1986 /* ARGSUSED */
1987 int
1988 chflags(td, uap)
1989 	struct thread *td;
1990 	register struct chflags_args /* {
1991 		char *path;
1992 		int flags;
1993 	} */ *uap;
1994 {
1995 	int error;
1996 	struct nameidata nd;
1997 
1998 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
1999 	if ((error = namei(&nd)) != 0)
2000 		return (error);
2001 	NDFREE(&nd, NDF_ONLY_PNBUF);
2002 	error = setfflags(td, nd.ni_vp, uap->flags);
2003 	vrele(nd.ni_vp);
2004 	return error;
2005 }
2006 
2007 /*
2008  * Same as chflags() but doesn't follow symlinks.
2009  */
2010 int
2011 lchflags(td, uap)
2012 	struct thread *td;
2013 	register struct lchflags_args /* {
2014 		char *path;
2015 		int flags;
2016 	} */ *uap;
2017 {
2018 	int error;
2019 	struct nameidata nd;
2020 
2021 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2022 	if ((error = namei(&nd)) != 0)
2023 		return (error);
2024 	NDFREE(&nd, NDF_ONLY_PNBUF);
2025 	error = setfflags(td, nd.ni_vp, uap->flags);
2026 	vrele(nd.ni_vp);
2027 	return error;
2028 }
2029 
2030 /*
2031  * Change flags of a file given a file descriptor.
2032  */
2033 #ifndef _SYS_SYSPROTO_H_
2034 struct fchflags_args {
2035 	int	fd;
2036 	int	flags;
2037 };
2038 #endif
2039 /* ARGSUSED */
2040 int
2041 fchflags(td, uap)
2042 	struct thread *td;
2043 	register struct fchflags_args /* {
2044 		int fd;
2045 		int flags;
2046 	} */ *uap;
2047 {
2048 	struct file *fp;
2049 	int error;
2050 
2051 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2052 		return (error);
2053 	error = setfflags(td, fp->f_data, uap->flags);
2054 	fdrop(fp, td);
2055 	return (error);
2056 }
2057 
2058 /*
2059  * Common implementation code for chmod(), lchmod() and fchmod().
2060  */
2061 static int
2062 setfmode(td, vp, mode)
2063 	struct thread *td;
2064 	struct vnode *vp;
2065 	int mode;
2066 {
2067 	int error;
2068 	struct mount *mp;
2069 	struct vattr vattr;
2070 
2071 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2072 		return (error);
2073 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2074 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2075 	VATTR_NULL(&vattr);
2076 	vattr.va_mode = mode & ALLPERMS;
2077 #ifdef MAC
2078 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2079 	if (error == 0)
2080 #endif
2081 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2082 	VOP_UNLOCK(vp, 0, td);
2083 	vn_finished_write(mp);
2084 	return error;
2085 }
2086 
2087 /*
2088  * Change mode of a file given path name.
2089  */
2090 #ifndef _SYS_SYSPROTO_H_
2091 struct chmod_args {
2092 	char	*path;
2093 	int	mode;
2094 };
2095 #endif
2096 /* ARGSUSED */
2097 int
2098 chmod(td, uap)
2099 	struct thread *td;
2100 	register struct chmod_args /* {
2101 		char *path;
2102 		int mode;
2103 	} */ *uap;
2104 {
2105 
2106 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2107 }
2108 
2109 int
2110 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2111 {
2112 	int error;
2113 	struct nameidata nd;
2114 
2115 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2116 	if ((error = namei(&nd)) != 0)
2117 		return (error);
2118 	NDFREE(&nd, NDF_ONLY_PNBUF);
2119 	error = setfmode(td, nd.ni_vp, mode);
2120 	vrele(nd.ni_vp);
2121 	return error;
2122 }
2123 
2124 /*
2125  * Change mode of a file given path name (don't follow links.)
2126  */
2127 #ifndef _SYS_SYSPROTO_H_
2128 struct lchmod_args {
2129 	char	*path;
2130 	int	mode;
2131 };
2132 #endif
2133 /* ARGSUSED */
2134 int
2135 lchmod(td, uap)
2136 	struct thread *td;
2137 	register struct lchmod_args /* {
2138 		char *path;
2139 		int mode;
2140 	} */ *uap;
2141 {
2142 	int error;
2143 	struct nameidata nd;
2144 
2145 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2146 	if ((error = namei(&nd)) != 0)
2147 		return (error);
2148 	NDFREE(&nd, NDF_ONLY_PNBUF);
2149 	error = setfmode(td, nd.ni_vp, uap->mode);
2150 	vrele(nd.ni_vp);
2151 	return error;
2152 }
2153 
2154 /*
2155  * Change mode of a file given a file descriptor.
2156  */
2157 #ifndef _SYS_SYSPROTO_H_
2158 struct fchmod_args {
2159 	int	fd;
2160 	int	mode;
2161 };
2162 #endif
2163 /* ARGSUSED */
2164 int
2165 fchmod(td, uap)
2166 	struct thread *td;
2167 	register struct fchmod_args /* {
2168 		int fd;
2169 		int mode;
2170 	} */ *uap;
2171 {
2172 	struct file *fp;
2173 	int error;
2174 
2175 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2176 		return (error);
2177 	error = setfmode(td, fp->f_data, uap->mode);
2178 	fdrop(fp, td);
2179 	return (error);
2180 }
2181 
2182 /*
2183  * Common implementation for chown(), lchown(), and fchown()
2184  */
2185 static int
2186 setfown(td, vp, uid, gid)
2187 	struct thread *td;
2188 	struct vnode *vp;
2189 	uid_t uid;
2190 	gid_t gid;
2191 {
2192 	int error;
2193 	struct mount *mp;
2194 	struct vattr vattr;
2195 
2196 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2197 		return (error);
2198 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2199 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2200 	VATTR_NULL(&vattr);
2201 	vattr.va_uid = uid;
2202 	vattr.va_gid = gid;
2203 #ifdef MAC
2204 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2205 	    vattr.va_gid);
2206 	if (error == 0)
2207 #endif
2208 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2209 	VOP_UNLOCK(vp, 0, td);
2210 	vn_finished_write(mp);
2211 	return error;
2212 }
2213 
2214 /*
2215  * Set ownership given a path name.
2216  */
2217 #ifndef _SYS_SYSPROTO_H_
2218 struct chown_args {
2219 	char	*path;
2220 	int	uid;
2221 	int	gid;
2222 };
2223 #endif
2224 /* ARGSUSED */
2225 int
2226 chown(td, uap)
2227 	struct thread *td;
2228 	register struct chown_args /* {
2229 		char *path;
2230 		int uid;
2231 		int gid;
2232 	} */ *uap;
2233 {
2234 
2235 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2236 }
2237 
2238 int
2239 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2240     int gid)
2241 {
2242 	int error;
2243 	struct nameidata nd;
2244 
2245 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2246 	if ((error = namei(&nd)) != 0)
2247 		return (error);
2248 	NDFREE(&nd, NDF_ONLY_PNBUF);
2249 	error = setfown(td, nd.ni_vp, uid, gid);
2250 	vrele(nd.ni_vp);
2251 	return (error);
2252 }
2253 
2254 /*
2255  * Set ownership given a path name, do not cross symlinks.
2256  */
2257 #ifndef _SYS_SYSPROTO_H_
2258 struct lchown_args {
2259 	char	*path;
2260 	int	uid;
2261 	int	gid;
2262 };
2263 #endif
2264 /* ARGSUSED */
2265 int
2266 lchown(td, uap)
2267 	struct thread *td;
2268 	register struct lchown_args /* {
2269 		char *path;
2270 		int uid;
2271 		int gid;
2272 	} */ *uap;
2273 {
2274 
2275 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2276 }
2277 
2278 int
2279 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2280     int gid)
2281 {
2282 	int error;
2283 	struct nameidata nd;
2284 
2285 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2286 	if ((error = namei(&nd)) != 0)
2287 		return (error);
2288 	NDFREE(&nd, NDF_ONLY_PNBUF);
2289 	error = setfown(td, nd.ni_vp, uid, gid);
2290 	vrele(nd.ni_vp);
2291 	return (error);
2292 }
2293 
2294 /*
2295  * Set ownership given a file descriptor.
2296  */
2297 #ifndef _SYS_SYSPROTO_H_
2298 struct fchown_args {
2299 	int	fd;
2300 	int	uid;
2301 	int	gid;
2302 };
2303 #endif
2304 /* ARGSUSED */
2305 int
2306 fchown(td, uap)
2307 	struct thread *td;
2308 	register struct fchown_args /* {
2309 		int fd;
2310 		int uid;
2311 		int gid;
2312 	} */ *uap;
2313 {
2314 	struct file *fp;
2315 	int error;
2316 
2317 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2318 		return (error);
2319 	error = setfown(td, fp->f_data, uap->uid, uap->gid);
2320 	fdrop(fp, td);
2321 	return (error);
2322 }
2323 
2324 /*
2325  * Common implementation code for utimes(), lutimes(), and futimes().
2326  */
2327 static int
2328 getutimes(usrtvp, tvpseg, tsp)
2329 	const struct timeval *usrtvp;
2330 	enum uio_seg tvpseg;
2331 	struct timespec *tsp;
2332 {
2333 	struct timeval tv[2];
2334 	const struct timeval *tvp;
2335 	int error;
2336 
2337 	if (usrtvp == NULL) {
2338 		microtime(&tv[0]);
2339 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2340 		tsp[1] = tsp[0];
2341 	} else {
2342 		if (tvpseg == UIO_SYSSPACE) {
2343 			tvp = usrtvp;
2344 		} else {
2345 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2346 				return (error);
2347 			tvp = tv;
2348 		}
2349 
2350 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2351 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2352 	}
2353 	return 0;
2354 }
2355 
2356 /*
2357  * Common implementation code for utimes(), lutimes(), and futimes().
2358  */
2359 static int
2360 setutimes(td, vp, ts, numtimes, nullflag)
2361 	struct thread *td;
2362 	struct vnode *vp;
2363 	const struct timespec *ts;
2364 	int numtimes;
2365 	int nullflag;
2366 {
2367 	int error, setbirthtime;
2368 	struct mount *mp;
2369 	struct vattr vattr;
2370 
2371 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2372 		return (error);
2373 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2374 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2375 	setbirthtime = 0;
2376 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2377 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2378 		setbirthtime = 1;
2379 	VATTR_NULL(&vattr);
2380 	vattr.va_atime = ts[0];
2381 	vattr.va_mtime = ts[1];
2382 	if (setbirthtime)
2383 		vattr.va_birthtime = ts[1];
2384 	if (numtimes > 2)
2385 		vattr.va_birthtime = ts[2];
2386 	if (nullflag)
2387 		vattr.va_vaflags |= VA_UTIMES_NULL;
2388 #ifdef MAC
2389 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2390 	    vattr.va_mtime);
2391 #endif
2392 	if (error == 0)
2393 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2394 	VOP_UNLOCK(vp, 0, td);
2395 	vn_finished_write(mp);
2396 	return error;
2397 }
2398 
2399 /*
2400  * Set the access and modification times of a file.
2401  */
2402 #ifndef _SYS_SYSPROTO_H_
2403 struct utimes_args {
2404 	char	*path;
2405 	struct	timeval *tptr;
2406 };
2407 #endif
2408 /* ARGSUSED */
2409 int
2410 utimes(td, uap)
2411 	struct thread *td;
2412 	register struct utimes_args /* {
2413 		char *path;
2414 		struct timeval *tptr;
2415 	} */ *uap;
2416 {
2417 
2418 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2419 	    UIO_USERSPACE));
2420 }
2421 
2422 int
2423 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2424     struct timeval *tptr, enum uio_seg tptrseg)
2425 {
2426 	struct timespec ts[2];
2427 	int error;
2428 	struct nameidata nd;
2429 
2430 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2431 		return (error);
2432 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2433 	if ((error = namei(&nd)) != 0)
2434 		return (error);
2435 	NDFREE(&nd, NDF_ONLY_PNBUF);
2436 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2437 	vrele(nd.ni_vp);
2438 	return (error);
2439 }
2440 
2441 /*
2442  * Set the access and modification times of a file.
2443  */
2444 #ifndef _SYS_SYSPROTO_H_
2445 struct lutimes_args {
2446 	char	*path;
2447 	struct	timeval *tptr;
2448 };
2449 #endif
2450 /* ARGSUSED */
2451 int
2452 lutimes(td, uap)
2453 	struct thread *td;
2454 	register struct lutimes_args /* {
2455 		char *path;
2456 		struct timeval *tptr;
2457 	} */ *uap;
2458 {
2459 
2460 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2461 	    UIO_USERSPACE));
2462 }
2463 
2464 int
2465 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2466     struct timeval *tptr, enum uio_seg tptrseg)
2467 {
2468 	struct timespec ts[2];
2469 	int error;
2470 	struct nameidata nd;
2471 
2472 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2473 		return (error);
2474 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2475 	if ((error = namei(&nd)) != 0)
2476 		return (error);
2477 	NDFREE(&nd, NDF_ONLY_PNBUF);
2478 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2479 	vrele(nd.ni_vp);
2480 	return (error);
2481 }
2482 
2483 /*
2484  * Set the access and modification times of a file.
2485  */
2486 #ifndef _SYS_SYSPROTO_H_
2487 struct futimes_args {
2488 	int	fd;
2489 	struct	timeval *tptr;
2490 };
2491 #endif
2492 /* ARGSUSED */
2493 int
2494 futimes(td, uap)
2495 	struct thread *td;
2496 	register struct futimes_args /* {
2497 		int  fd;
2498 		struct timeval *tptr;
2499 	} */ *uap;
2500 {
2501 
2502 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2503 }
2504 
2505 int
2506 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2507     enum uio_seg tptrseg)
2508 {
2509 	struct timespec ts[2];
2510 	struct file *fp;
2511 	int error;
2512 
2513 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2514 		return (error);
2515 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2516 		return (error);
2517 	error = setutimes(td, fp->f_data, ts, 2, tptr == NULL);
2518 	fdrop(fp, td);
2519 	return (error);
2520 }
2521 
2522 /*
2523  * Truncate a file given its path name.
2524  */
2525 #ifndef _SYS_SYSPROTO_H_
2526 struct truncate_args {
2527 	char	*path;
2528 	int	pad;
2529 	off_t	length;
2530 };
2531 #endif
2532 /* ARGSUSED */
2533 int
2534 truncate(td, uap)
2535 	struct thread *td;
2536 	register struct truncate_args /* {
2537 		char *path;
2538 		int pad;
2539 		off_t length;
2540 	} */ *uap;
2541 {
2542 
2543 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2544 }
2545 
2546 int
2547 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2548 {
2549 	struct mount *mp;
2550 	struct vnode *vp;
2551 	struct vattr vattr;
2552 	int error;
2553 	struct nameidata nd;
2554 
2555 	if (length < 0)
2556 		return(EINVAL);
2557 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2558 	if ((error = namei(&nd)) != 0)
2559 		return (error);
2560 	vp = nd.ni_vp;
2561 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2562 		vrele(vp);
2563 		return (error);
2564 	}
2565 	NDFREE(&nd, NDF_ONLY_PNBUF);
2566 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2567 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2568 	if (vp->v_type == VDIR)
2569 		error = EISDIR;
2570 #ifdef MAC
2571 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2572 	}
2573 #endif
2574 	else if ((error = vn_writechk(vp)) == 0 &&
2575 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2576 		VATTR_NULL(&vattr);
2577 		vattr.va_size = length;
2578 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2579 	}
2580 	vput(vp);
2581 	vn_finished_write(mp);
2582 	return (error);
2583 }
2584 
2585 /*
2586  * Truncate a file given a file descriptor.
2587  */
2588 #ifndef _SYS_SYSPROTO_H_
2589 struct ftruncate_args {
2590 	int	fd;
2591 	int	pad;
2592 	off_t	length;
2593 };
2594 #endif
2595 /* ARGSUSED */
2596 int
2597 ftruncate(td, uap)
2598 	struct thread *td;
2599 	register struct ftruncate_args /* {
2600 		int fd;
2601 		int pad;
2602 		off_t length;
2603 	} */ *uap;
2604 {
2605 	struct mount *mp;
2606 	struct vattr vattr;
2607 	struct vnode *vp;
2608 	struct file *fp;
2609 	int error;
2610 
2611 	if (uap->length < 0)
2612 		return(EINVAL);
2613 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2614 		return (error);
2615 	if ((fp->f_flag & FWRITE) == 0) {
2616 		fdrop(fp, td);
2617 		return (EINVAL);
2618 	}
2619 	vp = fp->f_data;
2620 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2621 		fdrop(fp, td);
2622 		return (error);
2623 	}
2624 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2625 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2626 	if (vp->v_type == VDIR)
2627 		error = EISDIR;
2628 #ifdef MAC
2629 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2630 	    vp))) {
2631 	}
2632 #endif
2633 	else if ((error = vn_writechk(vp)) == 0) {
2634 		VATTR_NULL(&vattr);
2635 		vattr.va_size = uap->length;
2636 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2637 	}
2638 	VOP_UNLOCK(vp, 0, td);
2639 	vn_finished_write(mp);
2640 	fdrop(fp, td);
2641 	return (error);
2642 }
2643 
2644 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2645 /*
2646  * Truncate a file given its path name.
2647  */
2648 #ifndef _SYS_SYSPROTO_H_
2649 struct otruncate_args {
2650 	char	*path;
2651 	long	length;
2652 };
2653 #endif
2654 /* ARGSUSED */
2655 int
2656 otruncate(td, uap)
2657 	struct thread *td;
2658 	register struct otruncate_args /* {
2659 		char *path;
2660 		long length;
2661 	} */ *uap;
2662 {
2663 	struct truncate_args /* {
2664 		char *path;
2665 		int pad;
2666 		off_t length;
2667 	} */ nuap;
2668 
2669 	nuap.path = uap->path;
2670 	nuap.length = uap->length;
2671 	return (truncate(td, &nuap));
2672 }
2673 
2674 /*
2675  * Truncate a file given a file descriptor.
2676  */
2677 #ifndef _SYS_SYSPROTO_H_
2678 struct oftruncate_args {
2679 	int	fd;
2680 	long	length;
2681 };
2682 #endif
2683 /* ARGSUSED */
2684 int
2685 oftruncate(td, uap)
2686 	struct thread *td;
2687 	register struct oftruncate_args /* {
2688 		int fd;
2689 		long length;
2690 	} */ *uap;
2691 {
2692 	struct ftruncate_args /* {
2693 		int fd;
2694 		int pad;
2695 		off_t length;
2696 	} */ nuap;
2697 
2698 	nuap.fd = uap->fd;
2699 	nuap.length = uap->length;
2700 	return (ftruncate(td, &nuap));
2701 }
2702 #endif /* COMPAT_43 || COMPAT_SUNOS */
2703 
2704 /*
2705  * Sync an open file.
2706  */
2707 #ifndef _SYS_SYSPROTO_H_
2708 struct fsync_args {
2709 	int	fd;
2710 };
2711 #endif
2712 /* ARGSUSED */
2713 int
2714 fsync(td, uap)
2715 	struct thread *td;
2716 	struct fsync_args /* {
2717 		int fd;
2718 	} */ *uap;
2719 {
2720 	struct vnode *vp;
2721 	struct mount *mp;
2722 	struct file *fp;
2723 	vm_object_t obj;
2724 	int error;
2725 
2726 	GIANT_REQUIRED;
2727 
2728 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2729 		return (error);
2730 	vp = fp->f_data;
2731 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2732 		fdrop(fp, td);
2733 		return (error);
2734 	}
2735 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2736 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2737 		VM_OBJECT_LOCK(obj);
2738 		vm_object_page_clean(obj, 0, 0, 0);
2739 		VM_OBJECT_UNLOCK(obj);
2740 	}
2741 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2742 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2743 	    && softdep_fsync_hook != NULL)
2744 		error = (*softdep_fsync_hook)(vp);
2745 
2746 	VOP_UNLOCK(vp, 0, td);
2747 	vn_finished_write(mp);
2748 	fdrop(fp, td);
2749 	return (error);
2750 }
2751 
2752 /*
2753  * Rename files.  Source and destination must either both be directories,
2754  * or both not be directories.  If target is a directory, it must be empty.
2755  */
2756 #ifndef _SYS_SYSPROTO_H_
2757 struct rename_args {
2758 	char	*from;
2759 	char	*to;
2760 };
2761 #endif
2762 /* ARGSUSED */
2763 int
2764 rename(td, uap)
2765 	struct thread *td;
2766 	register struct rename_args /* {
2767 		char *from;
2768 		char *to;
2769 	} */ *uap;
2770 {
2771 
2772 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2773 }
2774 
2775 int
2776 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2777 {
2778 	struct mount *mp = NULL;
2779 	struct vnode *tvp, *fvp, *tdvp;
2780 	struct nameidata fromnd, tond;
2781 	int error;
2782 
2783 	bwillwrite();
2784 #ifdef MAC
2785 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2786 	    from, td);
2787 #else
2788 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2789 #endif
2790 	if ((error = namei(&fromnd)) != 0)
2791 		return (error);
2792 #ifdef MAC
2793 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2794 	    fromnd.ni_vp, &fromnd.ni_cnd);
2795 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2796 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2797 #endif
2798 	fvp = fromnd.ni_vp;
2799 	if (error == 0)
2800 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2801 	if (error != 0) {
2802 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2803 		vrele(fromnd.ni_dvp);
2804 		vrele(fvp);
2805 		goto out1;
2806 	}
2807 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2808 	    NOOBJ, pathseg, to, td);
2809 	if (fromnd.ni_vp->v_type == VDIR)
2810 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2811 	if ((error = namei(&tond)) != 0) {
2812 		/* Translate error code for rename("dir1", "dir2/."). */
2813 		if (error == EISDIR && fvp->v_type == VDIR)
2814 			error = EINVAL;
2815 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2816 		vrele(fromnd.ni_dvp);
2817 		vrele(fvp);
2818 		goto out1;
2819 	}
2820 	tdvp = tond.ni_dvp;
2821 	tvp = tond.ni_vp;
2822 	if (tvp != NULL) {
2823 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2824 			error = ENOTDIR;
2825 			goto out;
2826 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2827 			error = EISDIR;
2828 			goto out;
2829 		}
2830 	}
2831 	if (fvp == tdvp)
2832 		error = EINVAL;
2833 	/*
2834 	 * If the source is the same as the destination (that is, if they
2835 	 * are links to the same vnode), then there is nothing to do.
2836 	 */
2837 	if (fvp == tvp)
2838 		error = -1;
2839 #ifdef MAC
2840 	else
2841 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2842 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2843 #endif
2844 out:
2845 	if (!error) {
2846 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2847 		if (fromnd.ni_dvp != tdvp) {
2848 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2849 		}
2850 		if (tvp) {
2851 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2852 		}
2853 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2854 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2855 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2856 		NDFREE(&tond, NDF_ONLY_PNBUF);
2857 	} else {
2858 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2859 		NDFREE(&tond, NDF_ONLY_PNBUF);
2860 		if (tdvp == tvp)
2861 			vrele(tdvp);
2862 		else
2863 			vput(tdvp);
2864 		if (tvp)
2865 			vput(tvp);
2866 		vrele(fromnd.ni_dvp);
2867 		vrele(fvp);
2868 	}
2869 	vrele(tond.ni_startdir);
2870 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2871 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2872 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2873 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2874 out1:
2875 	vn_finished_write(mp);
2876 	if (fromnd.ni_startdir)
2877 		vrele(fromnd.ni_startdir);
2878 	if (error == -1)
2879 		return (0);
2880 	return (error);
2881 }
2882 
2883 /*
2884  * Make a directory file.
2885  */
2886 #ifndef _SYS_SYSPROTO_H_
2887 struct mkdir_args {
2888 	char	*path;
2889 	int	mode;
2890 };
2891 #endif
2892 /* ARGSUSED */
2893 int
2894 mkdir(td, uap)
2895 	struct thread *td;
2896 	register struct mkdir_args /* {
2897 		char *path;
2898 		int mode;
2899 	} */ *uap;
2900 {
2901 
2902 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2903 }
2904 
2905 int
2906 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2907 {
2908 	struct mount *mp;
2909 	struct vnode *vp;
2910 	struct vattr vattr;
2911 	int error;
2912 	struct nameidata nd;
2913 
2914 restart:
2915 	bwillwrite();
2916 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2917 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2918 	if ((error = namei(&nd)) != 0)
2919 		return (error);
2920 	vp = nd.ni_vp;
2921 	if (vp != NULL) {
2922 		NDFREE(&nd, NDF_ONLY_PNBUF);
2923 		vrele(vp);
2924 		/*
2925 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2926 		 * the strange behaviour of leaving the vnode unlocked
2927 		 * if the target is the same vnode as the parent.
2928 		 */
2929 		if (vp == nd.ni_dvp)
2930 			vrele(nd.ni_dvp);
2931 		else
2932 			vput(nd.ni_dvp);
2933 		return (EEXIST);
2934 	}
2935 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2936 		NDFREE(&nd, NDF_ONLY_PNBUF);
2937 		vput(nd.ni_dvp);
2938 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2939 			return (error);
2940 		goto restart;
2941 	}
2942 	VATTR_NULL(&vattr);
2943 	vattr.va_type = VDIR;
2944 	FILEDESC_LOCK(td->td_proc->p_fd);
2945 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2946 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2947 #ifdef MAC
2948 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2949 	    &vattr);
2950 	if (error)
2951 		goto out;
2952 #endif
2953 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2954 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2955 #ifdef MAC
2956 out:
2957 #endif
2958 	NDFREE(&nd, NDF_ONLY_PNBUF);
2959 	vput(nd.ni_dvp);
2960 	if (!error)
2961 		vput(nd.ni_vp);
2962 	vn_finished_write(mp);
2963 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2964 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2965 	return (error);
2966 }
2967 
2968 /*
2969  * Remove a directory file.
2970  */
2971 #ifndef _SYS_SYSPROTO_H_
2972 struct rmdir_args {
2973 	char	*path;
2974 };
2975 #endif
2976 /* ARGSUSED */
2977 int
2978 rmdir(td, uap)
2979 	struct thread *td;
2980 	struct rmdir_args /* {
2981 		char *path;
2982 	} */ *uap;
2983 {
2984 
2985 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2986 }
2987 
2988 int
2989 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2990 {
2991 	struct mount *mp;
2992 	struct vnode *vp;
2993 	int error;
2994 	struct nameidata nd;
2995 
2996 restart:
2997 	bwillwrite();
2998 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2999 	if ((error = namei(&nd)) != 0)
3000 		return (error);
3001 	vp = nd.ni_vp;
3002 	if (vp->v_type != VDIR) {
3003 		error = ENOTDIR;
3004 		goto out;
3005 	}
3006 	/*
3007 	 * No rmdir "." please.
3008 	 */
3009 	if (nd.ni_dvp == vp) {
3010 		error = EINVAL;
3011 		goto out;
3012 	}
3013 	/*
3014 	 * The root of a mounted filesystem cannot be deleted.
3015 	 */
3016 	if (vp->v_vflag & VV_ROOT) {
3017 		error = EBUSY;
3018 		goto out;
3019 	}
3020 #ifdef MAC
3021 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3022 	    &nd.ni_cnd);
3023 	if (error)
3024 		goto out;
3025 #endif
3026 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3027 		NDFREE(&nd, NDF_ONLY_PNBUF);
3028 		if (nd.ni_dvp == vp)
3029 			vrele(nd.ni_dvp);
3030 		else
3031 			vput(nd.ni_dvp);
3032 		vput(vp);
3033 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3034 			return (error);
3035 		goto restart;
3036 	}
3037 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3038 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3039 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3040 	vn_finished_write(mp);
3041 out:
3042 	NDFREE(&nd, NDF_ONLY_PNBUF);
3043 	if (nd.ni_dvp == vp)
3044 		vrele(nd.ni_dvp);
3045 	else
3046 		vput(nd.ni_dvp);
3047 	vput(vp);
3048 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3049 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3050 	return (error);
3051 }
3052 
3053 #ifdef COMPAT_43
3054 /*
3055  * Read a block of directory entries in a filesystem independent format.
3056  */
3057 #ifndef _SYS_SYSPROTO_H_
3058 struct ogetdirentries_args {
3059 	int	fd;
3060 	char	*buf;
3061 	u_int	count;
3062 	long	*basep;
3063 };
3064 #endif
3065 int
3066 ogetdirentries(td, uap)
3067 	struct thread *td;
3068 	register struct ogetdirentries_args /* {
3069 		int fd;
3070 		char *buf;
3071 		u_int count;
3072 		long *basep;
3073 	} */ *uap;
3074 {
3075 	struct vnode *vp;
3076 	struct file *fp;
3077 	struct uio auio, kuio;
3078 	struct iovec aiov, kiov;
3079 	struct dirent *dp, *edp;
3080 	caddr_t dirbuf;
3081 	int error, eofflag, readcnt;
3082 	long loff;
3083 
3084 	/* XXX arbitrary sanity limit on `count'. */
3085 	if (uap->count > 64 * 1024)
3086 		return (EINVAL);
3087 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3088 		return (error);
3089 	if ((fp->f_flag & FREAD) == 0) {
3090 		fdrop(fp, td);
3091 		return (EBADF);
3092 	}
3093 	vp = fp->f_data;
3094 unionread:
3095 	if (vp->v_type != VDIR) {
3096 		fdrop(fp, td);
3097 		return (EINVAL);
3098 	}
3099 	aiov.iov_base = uap->buf;
3100 	aiov.iov_len = uap->count;
3101 	auio.uio_iov = &aiov;
3102 	auio.uio_iovcnt = 1;
3103 	auio.uio_rw = UIO_READ;
3104 	auio.uio_segflg = UIO_USERSPACE;
3105 	auio.uio_td = td;
3106 	auio.uio_resid = uap->count;
3107 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3108 	loff = auio.uio_offset = fp->f_offset;
3109 #ifdef MAC
3110 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3111 	if (error) {
3112 		VOP_UNLOCK(vp, 0, td);
3113 		fdrop(fp, td);
3114 		return (error);
3115 	}
3116 #endif
3117 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3118 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3119 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3120 			    NULL, NULL);
3121 			fp->f_offset = auio.uio_offset;
3122 		} else
3123 #	endif
3124 	{
3125 		kuio = auio;
3126 		kuio.uio_iov = &kiov;
3127 		kuio.uio_segflg = UIO_SYSSPACE;
3128 		kiov.iov_len = uap->count;
3129 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3130 		kiov.iov_base = dirbuf;
3131 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3132 			    NULL, NULL);
3133 		fp->f_offset = kuio.uio_offset;
3134 		if (error == 0) {
3135 			readcnt = uap->count - kuio.uio_resid;
3136 			edp = (struct dirent *)&dirbuf[readcnt];
3137 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3138 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3139 					/*
3140 					 * The expected low byte of
3141 					 * dp->d_namlen is our dp->d_type.
3142 					 * The high MBZ byte of dp->d_namlen
3143 					 * is our dp->d_namlen.
3144 					 */
3145 					dp->d_type = dp->d_namlen;
3146 					dp->d_namlen = 0;
3147 #				else
3148 					/*
3149 					 * The dp->d_type is the high byte
3150 					 * of the expected dp->d_namlen,
3151 					 * so must be zero'ed.
3152 					 */
3153 					dp->d_type = 0;
3154 #				endif
3155 				if (dp->d_reclen > 0) {
3156 					dp = (struct dirent *)
3157 					    ((char *)dp + dp->d_reclen);
3158 				} else {
3159 					error = EIO;
3160 					break;
3161 				}
3162 			}
3163 			if (dp >= edp)
3164 				error = uiomove(dirbuf, readcnt, &auio);
3165 		}
3166 		FREE(dirbuf, M_TEMP);
3167 	}
3168 	VOP_UNLOCK(vp, 0, td);
3169 	if (error) {
3170 		fdrop(fp, td);
3171 		return (error);
3172 	}
3173 	if (uap->count == auio.uio_resid) {
3174 		if (union_dircheckp) {
3175 			error = union_dircheckp(td, &vp, fp);
3176 			if (error == -1)
3177 				goto unionread;
3178 			if (error) {
3179 				fdrop(fp, td);
3180 				return (error);
3181 			}
3182 		}
3183 		/*
3184 		 * XXX We could delay dropping the lock above but
3185 		 * union_dircheckp complicates things.
3186 		 */
3187 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3188 		if ((vp->v_vflag & VV_ROOT) &&
3189 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3190 			struct vnode *tvp = vp;
3191 			vp = vp->v_mount->mnt_vnodecovered;
3192 			VREF(vp);
3193 			fp->f_data = vp;
3194 			fp->f_offset = 0;
3195 			vput(tvp);
3196 			goto unionread;
3197 		}
3198 		VOP_UNLOCK(vp, 0, td);
3199 	}
3200 	error = copyout(&loff, uap->basep, sizeof(long));
3201 	fdrop(fp, td);
3202 	td->td_retval[0] = uap->count - auio.uio_resid;
3203 	return (error);
3204 }
3205 #endif /* COMPAT_43 */
3206 
3207 /*
3208  * Read a block of directory entries in a filesystem independent format.
3209  */
3210 #ifndef _SYS_SYSPROTO_H_
3211 struct getdirentries_args {
3212 	int	fd;
3213 	char	*buf;
3214 	u_int	count;
3215 	long	*basep;
3216 };
3217 #endif
3218 int
3219 getdirentries(td, uap)
3220 	struct thread *td;
3221 	register struct getdirentries_args /* {
3222 		int fd;
3223 		char *buf;
3224 		u_int count;
3225 		long *basep;
3226 	} */ *uap;
3227 {
3228 	struct vnode *vp;
3229 	struct file *fp;
3230 	struct uio auio;
3231 	struct iovec aiov;
3232 	long loff;
3233 	int error, eofflag;
3234 
3235 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3236 		return (error);
3237 	if ((fp->f_flag & FREAD) == 0) {
3238 		fdrop(fp, td);
3239 		return (EBADF);
3240 	}
3241 	vp = fp->f_data;
3242 unionread:
3243 	if (vp->v_type != VDIR) {
3244 		fdrop(fp, td);
3245 		return (EINVAL);
3246 	}
3247 	aiov.iov_base = uap->buf;
3248 	aiov.iov_len = uap->count;
3249 	auio.uio_iov = &aiov;
3250 	auio.uio_iovcnt = 1;
3251 	auio.uio_rw = UIO_READ;
3252 	auio.uio_segflg = UIO_USERSPACE;
3253 	auio.uio_td = td;
3254 	auio.uio_resid = uap->count;
3255 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3256 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3257 	loff = auio.uio_offset = fp->f_offset;
3258 #ifdef MAC
3259 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3260 	if (error == 0)
3261 #endif
3262 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3263 		    NULL);
3264 	fp->f_offset = auio.uio_offset;
3265 	VOP_UNLOCK(vp, 0, td);
3266 	if (error) {
3267 		fdrop(fp, td);
3268 		return (error);
3269 	}
3270 	if (uap->count == auio.uio_resid) {
3271 		if (union_dircheckp) {
3272 			error = union_dircheckp(td, &vp, fp);
3273 			if (error == -1)
3274 				goto unionread;
3275 			if (error) {
3276 				fdrop(fp, td);
3277 				return (error);
3278 			}
3279 		}
3280 		/*
3281 		 * XXX We could delay dropping the lock above but
3282 		 * union_dircheckp complicates things.
3283 		 */
3284 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3285 		if ((vp->v_vflag & VV_ROOT) &&
3286 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3287 			struct vnode *tvp = vp;
3288 			vp = vp->v_mount->mnt_vnodecovered;
3289 			VREF(vp);
3290 			fp->f_data = vp;
3291 			fp->f_offset = 0;
3292 			vput(tvp);
3293 			goto unionread;
3294 		}
3295 		VOP_UNLOCK(vp, 0, td);
3296 	}
3297 	if (uap->basep != NULL) {
3298 		error = copyout(&loff, uap->basep, sizeof(long));
3299 	}
3300 	td->td_retval[0] = uap->count - auio.uio_resid;
3301 	fdrop(fp, td);
3302 	return (error);
3303 }
3304 #ifndef _SYS_SYSPROTO_H_
3305 struct getdents_args {
3306 	int fd;
3307 	char *buf;
3308 	size_t count;
3309 };
3310 #endif
3311 int
3312 getdents(td, uap)
3313 	struct thread *td;
3314 	register struct getdents_args /* {
3315 		int fd;
3316 		char *buf;
3317 		u_int count;
3318 	} */ *uap;
3319 {
3320 	struct getdirentries_args ap;
3321 	ap.fd = uap->fd;
3322 	ap.buf = uap->buf;
3323 	ap.count = uap->count;
3324 	ap.basep = NULL;
3325 	return getdirentries(td, &ap);
3326 }
3327 
3328 /*
3329  * Set the mode mask for creation of filesystem nodes.
3330  *
3331  * MP SAFE
3332  */
3333 #ifndef _SYS_SYSPROTO_H_
3334 struct umask_args {
3335 	int	newmask;
3336 };
3337 #endif
3338 int
3339 umask(td, uap)
3340 	struct thread *td;
3341 	struct umask_args /* {
3342 		int newmask;
3343 	} */ *uap;
3344 {
3345 	register struct filedesc *fdp;
3346 
3347 	FILEDESC_LOCK(td->td_proc->p_fd);
3348 	fdp = td->td_proc->p_fd;
3349 	td->td_retval[0] = fdp->fd_cmask;
3350 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3351 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3352 	return (0);
3353 }
3354 
3355 /*
3356  * Void all references to file by ripping underlying filesystem
3357  * away from vnode.
3358  */
3359 #ifndef _SYS_SYSPROTO_H_
3360 struct revoke_args {
3361 	char	*path;
3362 };
3363 #endif
3364 /* ARGSUSED */
3365 int
3366 revoke(td, uap)
3367 	struct thread *td;
3368 	register struct revoke_args /* {
3369 		char *path;
3370 	} */ *uap;
3371 {
3372 	struct mount *mp;
3373 	struct vnode *vp;
3374 	struct vattr vattr;
3375 	int error;
3376 	struct nameidata nd;
3377 
3378 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3379 	if ((error = namei(&nd)) != 0)
3380 		return (error);
3381 	vp = nd.ni_vp;
3382 	NDFREE(&nd, NDF_ONLY_PNBUF);
3383 	if (vp->v_type != VCHR) {
3384 		vput(vp);
3385 		return (EINVAL);
3386 	}
3387 #ifdef MAC
3388 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3389 	if (error) {
3390 		vput(vp);
3391 		return (error);
3392 	}
3393 #endif
3394 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3395 	if (error) {
3396 		vput(vp);
3397 		return (error);
3398 	}
3399 	VOP_UNLOCK(vp, 0, td);
3400 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3401 		error = suser_cred(td->td_ucred, PRISON_ROOT);
3402 		if (error)
3403 			goto out;
3404 	}
3405 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3406 		goto out;
3407 	if (vcount(vp) > 1)
3408 		VOP_REVOKE(vp, REVOKEALL);
3409 	vn_finished_write(mp);
3410 out:
3411 	vrele(vp);
3412 	return (error);
3413 }
3414 
3415 /*
3416  * Convert a user file descriptor to a kernel file entry.
3417  * The file entry is locked upon returning.
3418  */
3419 int
3420 getvnode(fdp, fd, fpp)
3421 	struct filedesc *fdp;
3422 	int fd;
3423 	struct file **fpp;
3424 {
3425 	int error;
3426 	struct file *fp;
3427 
3428 	fp = NULL;
3429 	if (fdp == NULL)
3430 		error = EBADF;
3431 	else {
3432 		FILEDESC_LOCK(fdp);
3433 		if ((u_int)fd >= fdp->fd_nfiles ||
3434 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3435 			error = EBADF;
3436 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3437 			fp = NULL;
3438 			error = EINVAL;
3439 		} else {
3440 			fhold(fp);
3441 			error = 0;
3442 		}
3443 		FILEDESC_UNLOCK(fdp);
3444 	}
3445 	*fpp = fp;
3446 	return (error);
3447 }
3448 /*
3449  * Get (NFS) file handle
3450  */
3451 #ifndef _SYS_SYSPROTO_H_
3452 struct getfh_args {
3453 	char	*fname;
3454 	fhandle_t *fhp;
3455 };
3456 #endif
3457 int
3458 getfh(td, uap)
3459 	struct thread *td;
3460 	register struct getfh_args *uap;
3461 {
3462 	struct nameidata nd;
3463 	fhandle_t fh;
3464 	register struct vnode *vp;
3465 	int error;
3466 
3467 	/*
3468 	 * Must be super user
3469 	 */
3470 	error = suser(td);
3471 	if (error)
3472 		return (error);
3473 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3474 	error = namei(&nd);
3475 	if (error)
3476 		return (error);
3477 	NDFREE(&nd, NDF_ONLY_PNBUF);
3478 	vp = nd.ni_vp;
3479 	bzero(&fh, sizeof(fh));
3480 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3481 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3482 	vput(vp);
3483 	if (error)
3484 		return (error);
3485 	error = copyout(&fh, uap->fhp, sizeof (fh));
3486 	return (error);
3487 }
3488 
3489 /*
3490  * syscall for the rpc.lockd to use to translate a NFS file handle into
3491  * an open descriptor.
3492  *
3493  * warning: do not remove the suser() call or this becomes one giant
3494  * security hole.
3495  */
3496 #ifndef _SYS_SYSPROTO_H_
3497 struct fhopen_args {
3498 	const struct fhandle *u_fhp;
3499 	int flags;
3500 };
3501 #endif
3502 int
3503 fhopen(td, uap)
3504 	struct thread *td;
3505 	struct fhopen_args /* {
3506 		const struct fhandle *u_fhp;
3507 		int flags;
3508 	} */ *uap;
3509 {
3510 	struct proc *p = td->td_proc;
3511 	struct mount *mp;
3512 	struct vnode *vp;
3513 	struct fhandle fhp;
3514 	struct vattr vat;
3515 	struct vattr *vap = &vat;
3516 	struct flock lf;
3517 	struct file *fp;
3518 	register struct filedesc *fdp = p->p_fd;
3519 	int fmode, mode, error, type;
3520 	struct file *nfp;
3521 	int indx;
3522 
3523 	/*
3524 	 * Must be super user
3525 	 */
3526 	error = suser(td);
3527 	if (error)
3528 		return (error);
3529 
3530 	fmode = FFLAGS(uap->flags);
3531 	/* why not allow a non-read/write open for our lockd? */
3532 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3533 		return (EINVAL);
3534 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3535 	if (error)
3536 		return(error);
3537 	/* find the mount point */
3538 	mp = vfs_getvfs(&fhp.fh_fsid);
3539 	if (mp == NULL)
3540 		return (ESTALE);
3541 	/* now give me my vnode, it gets returned to me locked */
3542 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3543 	if (error)
3544 		return (error);
3545  	/*
3546 	 * from now on we have to make sure not
3547 	 * to forget about the vnode
3548 	 * any error that causes an abort must vput(vp)
3549 	 * just set error = err and 'goto bad;'.
3550 	 */
3551 
3552 	/*
3553 	 * from vn_open
3554 	 */
3555 	if (vp->v_type == VLNK) {
3556 		error = EMLINK;
3557 		goto bad;
3558 	}
3559 	if (vp->v_type == VSOCK) {
3560 		error = EOPNOTSUPP;
3561 		goto bad;
3562 	}
3563 	mode = 0;
3564 	if (fmode & (FWRITE | O_TRUNC)) {
3565 		if (vp->v_type == VDIR) {
3566 			error = EISDIR;
3567 			goto bad;
3568 		}
3569 		error = vn_writechk(vp);
3570 		if (error)
3571 			goto bad;
3572 		mode |= VWRITE;
3573 	}
3574 	if (fmode & FREAD)
3575 		mode |= VREAD;
3576 	if (fmode & O_APPEND)
3577 		mode |= VAPPEND;
3578 #ifdef MAC
3579 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3580 	if (error)
3581 		goto bad;
3582 #endif
3583 	if (mode) {
3584 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3585 		if (error)
3586 			goto bad;
3587 	}
3588 	if (fmode & O_TRUNC) {
3589 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3590 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3591 			vrele(vp);
3592 			return (error);
3593 		}
3594 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3595 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3596 #ifdef MAC
3597 		/*
3598 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3599 		 * should be right.
3600 		 */
3601 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3602 		if (error == 0) {
3603 #endif
3604 			VATTR_NULL(vap);
3605 			vap->va_size = 0;
3606 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3607 #ifdef MAC
3608 		}
3609 #endif
3610 		vn_finished_write(mp);
3611 		if (error)
3612 			goto bad;
3613 	}
3614 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3615 	if (error)
3616 		goto bad;
3617 	/*
3618 	 * Make sure that a VM object is created for VMIO support.
3619 	 */
3620 	if (vn_canvmio(vp) == TRUE) {
3621 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3622 			goto bad;
3623 	}
3624 	if (fmode & FWRITE)
3625 		vp->v_writecount++;
3626 
3627 	/*
3628 	 * end of vn_open code
3629 	 */
3630 
3631 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3632 		if (fmode & FWRITE)
3633 			vp->v_writecount--;
3634 		goto bad;
3635 	}
3636 	fp = nfp;
3637 
3638 	/*
3639 	 * Hold an extra reference to avoid having fp ripped out
3640 	 * from under us while we block in the lock op
3641 	 */
3642 	fhold(fp);
3643 	nfp->f_data = vp;
3644 	nfp->f_flag = fmode & FMASK;
3645 	nfp->f_ops = &vnops;
3646 	nfp->f_type = DTYPE_VNODE;
3647 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3648 		lf.l_whence = SEEK_SET;
3649 		lf.l_start = 0;
3650 		lf.l_len = 0;
3651 		if (fmode & O_EXLOCK)
3652 			lf.l_type = F_WRLCK;
3653 		else
3654 			lf.l_type = F_RDLCK;
3655 		type = F_FLOCK;
3656 		if ((fmode & FNONBLOCK) == 0)
3657 			type |= F_WAIT;
3658 		VOP_UNLOCK(vp, 0, td);
3659 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3660 			    type)) != 0) {
3661 			/*
3662 			 * The lock request failed.  Normally close the
3663 			 * descriptor but handle the case where someone might
3664 			 * have dup()d or close()d it when we weren't looking.
3665 			 */
3666 			FILEDESC_LOCK(fdp);
3667 			if (fdp->fd_ofiles[indx] == fp) {
3668 				fdp->fd_ofiles[indx] = NULL;
3669 				FILEDESC_UNLOCK(fdp);
3670 				fdrop(fp, td);
3671 			} else
3672 				FILEDESC_UNLOCK(fdp);
3673 			/*
3674 			 * release our private reference
3675 			 */
3676 			fdrop(fp, td);
3677 			return(error);
3678 		}
3679 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3680 		fp->f_flag |= FHASLOCK;
3681 	}
3682 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3683 		vfs_object_create(vp, td, td->td_ucred);
3684 
3685 	VOP_UNLOCK(vp, 0, td);
3686 	fdrop(fp, td);
3687 	td->td_retval[0] = indx;
3688 	return (0);
3689 
3690 bad:
3691 	vput(vp);
3692 	return (error);
3693 }
3694 
3695 /*
3696  * Stat an (NFS) file handle.
3697  */
3698 #ifndef _SYS_SYSPROTO_H_
3699 struct fhstat_args {
3700 	struct fhandle *u_fhp;
3701 	struct stat *sb;
3702 };
3703 #endif
3704 int
3705 fhstat(td, uap)
3706 	struct thread *td;
3707 	register struct fhstat_args /* {
3708 		struct fhandle *u_fhp;
3709 		struct stat *sb;
3710 	} */ *uap;
3711 {
3712 	struct stat sb;
3713 	fhandle_t fh;
3714 	struct mount *mp;
3715 	struct vnode *vp;
3716 	int error;
3717 
3718 	/*
3719 	 * Must be super user
3720 	 */
3721 	error = suser(td);
3722 	if (error)
3723 		return (error);
3724 
3725 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3726 	if (error)
3727 		return (error);
3728 
3729 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3730 		return (ESTALE);
3731 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3732 		return (error);
3733 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3734 	vput(vp);
3735 	if (error)
3736 		return (error);
3737 	error = copyout(&sb, uap->sb, sizeof(sb));
3738 	return (error);
3739 }
3740 
3741 /*
3742  * Implement fstatfs() for (NFS) file handles.
3743  */
3744 #ifndef _SYS_SYSPROTO_H_
3745 struct fhstatfs_args {
3746 	struct fhandle *u_fhp;
3747 	struct statfs *buf;
3748 };
3749 #endif
3750 int
3751 fhstatfs(td, uap)
3752 	struct thread *td;
3753 	struct fhstatfs_args /* {
3754 		struct fhandle *u_fhp;
3755 		struct statfs *buf;
3756 	} */ *uap;
3757 {
3758 	struct statfs *sp;
3759 	struct mount *mp;
3760 	struct vnode *vp;
3761 	struct statfs sb;
3762 	fhandle_t fh;
3763 	int error;
3764 
3765 	/*
3766 	 * Must be super user
3767 	 */
3768 	error = suser(td);
3769 	if (error)
3770 		return (error);
3771 
3772 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3773 		return (error);
3774 
3775 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3776 		return (ESTALE);
3777 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3778 		return (error);
3779 	mp = vp->v_mount;
3780 	sp = &mp->mnt_stat;
3781 	vput(vp);
3782 #ifdef MAC
3783 	error = mac_check_mount_stat(td->td_ucred, mp);
3784 	if (error)
3785 		return (error);
3786 #endif
3787 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3788 		return (error);
3789 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3790 	if (suser(td)) {
3791 		bcopy(sp, &sb, sizeof(sb));
3792 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3793 		sp = &sb;
3794 	}
3795 	return (copyout(sp, uap->buf, sizeof(*sp)));
3796 }
3797 
3798 /*
3799  * Syscall to push extended attribute configuration information into the
3800  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3801  * a command (int cmd), and attribute name and misc data.  For now, the
3802  * attribute name is left in userspace for consumption by the VFS_op.
3803  * It will probably be changed to be copied into sysspace by the
3804  * syscall in the future, once issues with various consumers of the
3805  * attribute code have raised their hands.
3806  *
3807  * Currently this is used only by UFS Extended Attributes.
3808  */
3809 int
3810 extattrctl(td, uap)
3811 	struct thread *td;
3812 	struct extattrctl_args /* {
3813 		const char *path;
3814 		int cmd;
3815 		const char *filename;
3816 		int attrnamespace;
3817 		const char *attrname;
3818 	} */ *uap;
3819 {
3820 	struct vnode *filename_vp;
3821 	struct nameidata nd;
3822 	struct mount *mp, *mp_writable;
3823 	char attrname[EXTATTR_MAXNAMELEN];
3824 	int error;
3825 
3826 	/*
3827 	 * uap->attrname is not always defined.  We check again later when we
3828 	 * invoke the VFS call so as to pass in NULL there if needed.
3829 	 */
3830 	if (uap->attrname != NULL) {
3831 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3832 		    NULL);
3833 		if (error)
3834 			return (error);
3835 	}
3836 
3837 	/*
3838 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3839 	 * which VFS_EXTATTRCTL() will later release.
3840 	 */
3841 	filename_vp = NULL;
3842 	if (uap->filename != NULL) {
3843 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3844 		    uap->filename, td);
3845 		error = namei(&nd);
3846 		if (error)
3847 			return (error);
3848 		filename_vp = nd.ni_vp;
3849 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3850 	}
3851 
3852 	/* uap->path is always defined. */
3853 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3854 	error = namei(&nd);
3855 	if (error) {
3856 		if (filename_vp != NULL)
3857 			vput(filename_vp);
3858 		return (error);
3859 	}
3860 	mp = nd.ni_vp->v_mount;
3861 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3862 	NDFREE(&nd, 0);
3863 	if (error) {
3864 		if (filename_vp != NULL)
3865 			vput(filename_vp);
3866 		return (error);
3867 	}
3868 
3869 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3870 	    uap->attrname != NULL ? attrname : NULL, td);
3871 
3872 	vn_finished_write(mp_writable);
3873 	/*
3874 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3875 	 * filename_vp, so vrele it if it is defined.
3876 	 */
3877 	if (filename_vp != NULL)
3878 		vrele(filename_vp);
3879 	return (error);
3880 }
3881 
3882 /*-
3883  * Set a named extended attribute on a file or directory
3884  *
3885  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3886  *            kernelspace string pointer "attrname", userspace buffer
3887  *            pointer "data", buffer length "nbytes", thread "td".
3888  * Returns: 0 on success, an error number otherwise
3889  * Locks: none
3890  * References: vp must be a valid reference for the duration of the call
3891  */
3892 static int
3893 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3894     void *data, size_t nbytes, struct thread *td)
3895 {
3896 	struct mount *mp;
3897 	struct uio auio;
3898 	struct iovec aiov;
3899 	ssize_t cnt;
3900 	int error;
3901 
3902 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3903 	if (error)
3904 		return (error);
3905 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3906 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3907 
3908 	aiov.iov_base = data;
3909 	aiov.iov_len = nbytes;
3910 	auio.uio_iov = &aiov;
3911 	auio.uio_iovcnt = 1;
3912 	auio.uio_offset = 0;
3913 	if (nbytes > INT_MAX) {
3914 		error = EINVAL;
3915 		goto done;
3916 	}
3917 	auio.uio_resid = nbytes;
3918 	auio.uio_rw = UIO_WRITE;
3919 	auio.uio_segflg = UIO_USERSPACE;
3920 	auio.uio_td = td;
3921 	cnt = nbytes;
3922 
3923 #ifdef MAC
3924 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3925 	    attrname, &auio);
3926 	if (error)
3927 		goto done;
3928 #endif
3929 
3930 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3931 	    td->td_ucred, td);
3932 	cnt -= auio.uio_resid;
3933 	td->td_retval[0] = cnt;
3934 
3935 done:
3936 	VOP_UNLOCK(vp, 0, td);
3937 	vn_finished_write(mp);
3938 	return (error);
3939 }
3940 
3941 int
3942 extattr_set_fd(td, uap)
3943 	struct thread *td;
3944 	struct extattr_set_fd_args /* {
3945 		int fd;
3946 		int attrnamespace;
3947 		const char *attrname;
3948 		void *data;
3949 		size_t nbytes;
3950 	} */ *uap;
3951 {
3952 	struct file *fp;
3953 	char attrname[EXTATTR_MAXNAMELEN];
3954 	int error;
3955 
3956 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3957 	if (error)
3958 		return (error);
3959 
3960 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3961 	if (error)
3962 		return (error);
3963 
3964 	error = extattr_set_vp(fp->f_data, uap->attrnamespace,
3965 	    attrname, uap->data, uap->nbytes, td);
3966 	fdrop(fp, td);
3967 
3968 	return (error);
3969 }
3970 
3971 int
3972 extattr_set_file(td, uap)
3973 	struct thread *td;
3974 	struct extattr_set_file_args /* {
3975 		const char *path;
3976 		int attrnamespace;
3977 		const char *attrname;
3978 		void *data;
3979 		size_t nbytes;
3980 	} */ *uap;
3981 {
3982 	struct nameidata nd;
3983 	char attrname[EXTATTR_MAXNAMELEN];
3984 	int error;
3985 
3986 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3987 	if (error)
3988 		return (error);
3989 
3990 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3991 	error = namei(&nd);
3992 	if (error)
3993 		return (error);
3994 	NDFREE(&nd, NDF_ONLY_PNBUF);
3995 
3996 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3997 	    uap->data, uap->nbytes, td);
3998 
3999 	vrele(nd.ni_vp);
4000 	return (error);
4001 }
4002 
4003 int
4004 extattr_set_link(td, uap)
4005 	struct thread *td;
4006 	struct extattr_set_link_args /* {
4007 		const char *path;
4008 		int attrnamespace;
4009 		const char *attrname;
4010 		void *data;
4011 		size_t nbytes;
4012 	} */ *uap;
4013 {
4014 	struct nameidata nd;
4015 	char attrname[EXTATTR_MAXNAMELEN];
4016 	int error;
4017 
4018 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4019 	if (error)
4020 		return (error);
4021 
4022 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4023 	error = namei(&nd);
4024 	if (error)
4025 		return (error);
4026 	NDFREE(&nd, NDF_ONLY_PNBUF);
4027 
4028 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4029 	    uap->data, uap->nbytes, td);
4030 
4031 	vrele(nd.ni_vp);
4032 	return (error);
4033 }
4034 
4035 /*-
4036  * Get a named extended attribute on a file or directory
4037  *
4038  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4039  *            kernelspace string pointer "attrname", userspace buffer
4040  *            pointer "data", buffer length "nbytes", thread "td".
4041  * Returns: 0 on success, an error number otherwise
4042  * Locks: none
4043  * References: vp must be a valid reference for the duration of the call
4044  */
4045 static int
4046 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4047     void *data, size_t nbytes, struct thread *td)
4048 {
4049 	struct uio auio, *auiop;
4050 	struct iovec aiov;
4051 	ssize_t cnt;
4052 	size_t size, *sizep;
4053 	int error;
4054 
4055 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4056 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4057 
4058 	/*
4059 	 * Slightly unusual semantics: if the user provides a NULL data
4060 	 * pointer, they don't want to receive the data, just the
4061 	 * maximum read length.
4062 	 */
4063 	auiop = NULL;
4064 	sizep = NULL;
4065 	cnt = 0;
4066 	if (data != NULL) {
4067 		aiov.iov_base = data;
4068 		aiov.iov_len = nbytes;
4069 		auio.uio_iov = &aiov;
4070 		auio.uio_offset = 0;
4071 		if (nbytes > INT_MAX) {
4072 			error = EINVAL;
4073 			goto done;
4074 		}
4075 		auio.uio_resid = nbytes;
4076 		auio.uio_rw = UIO_READ;
4077 		auio.uio_segflg = UIO_USERSPACE;
4078 		auio.uio_td = td;
4079 		auiop = &auio;
4080 		cnt = nbytes;
4081 	} else
4082 		sizep = &size;
4083 
4084 #ifdef MAC
4085 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4086 	    attrname, &auio);
4087 	if (error)
4088 		goto done;
4089 #endif
4090 
4091 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4092 	    td->td_ucred, td);
4093 
4094 	if (auiop != NULL) {
4095 		cnt -= auio.uio_resid;
4096 		td->td_retval[0] = cnt;
4097 	} else
4098 		td->td_retval[0] = size;
4099 
4100 done:
4101 	VOP_UNLOCK(vp, 0, td);
4102 	return (error);
4103 }
4104 
4105 int
4106 extattr_get_fd(td, uap)
4107 	struct thread *td;
4108 	struct extattr_get_fd_args /* {
4109 		int fd;
4110 		int attrnamespace;
4111 		const char *attrname;
4112 		void *data;
4113 		size_t nbytes;
4114 	} */ *uap;
4115 {
4116 	struct file *fp;
4117 	char attrname[EXTATTR_MAXNAMELEN];
4118 	int error;
4119 
4120 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4121 	if (error)
4122 		return (error);
4123 
4124 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4125 	if (error)
4126 		return (error);
4127 
4128 	error = extattr_get_vp(fp->f_data, uap->attrnamespace,
4129 	    attrname, uap->data, uap->nbytes, td);
4130 
4131 	fdrop(fp, td);
4132 	return (error);
4133 }
4134 
4135 int
4136 extattr_get_file(td, uap)
4137 	struct thread *td;
4138 	struct extattr_get_file_args /* {
4139 		const char *path;
4140 		int attrnamespace;
4141 		const char *attrname;
4142 		void *data;
4143 		size_t nbytes;
4144 	} */ *uap;
4145 {
4146 	struct nameidata nd;
4147 	char attrname[EXTATTR_MAXNAMELEN];
4148 	int error;
4149 
4150 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4151 	if (error)
4152 		return (error);
4153 
4154 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4155 	error = namei(&nd);
4156 	if (error)
4157 		return (error);
4158 	NDFREE(&nd, NDF_ONLY_PNBUF);
4159 
4160 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4161 	    uap->data, uap->nbytes, td);
4162 
4163 	vrele(nd.ni_vp);
4164 	return (error);
4165 }
4166 
4167 int
4168 extattr_get_link(td, uap)
4169 	struct thread *td;
4170 	struct extattr_get_link_args /* {
4171 		const char *path;
4172 		int attrnamespace;
4173 		const char *attrname;
4174 		void *data;
4175 		size_t nbytes;
4176 	} */ *uap;
4177 {
4178 	struct nameidata nd;
4179 	char attrname[EXTATTR_MAXNAMELEN];
4180 	int error;
4181 
4182 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4183 	if (error)
4184 		return (error);
4185 
4186 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4187 	error = namei(&nd);
4188 	if (error)
4189 		return (error);
4190 	NDFREE(&nd, NDF_ONLY_PNBUF);
4191 
4192 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4193 	    uap->data, uap->nbytes, td);
4194 
4195 	vrele(nd.ni_vp);
4196 	return (error);
4197 }
4198 
4199 /*
4200  * extattr_delete_vp(): Delete a named extended attribute on a file or
4201  *                      directory
4202  *
4203  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4204  *            kernelspace string pointer "attrname", proc "p"
4205  * Returns: 0 on success, an error number otherwise
4206  * Locks: none
4207  * References: vp must be a valid reference for the duration of the call
4208  */
4209 static int
4210 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4211     struct thread *td)
4212 {
4213 	struct mount *mp;
4214 	int error;
4215 
4216 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4217 	if (error)
4218 		return (error);
4219 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4220 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4221 
4222 #ifdef MAC
4223 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4224 	    attrname, NULL);
4225 	if (error)
4226 		goto done;
4227 #endif
4228 
4229 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4230 	    td);
4231 #ifdef MAC
4232 done:
4233 #endif
4234 	VOP_UNLOCK(vp, 0, td);
4235 	vn_finished_write(mp);
4236 	return (error);
4237 }
4238 
4239 int
4240 extattr_delete_fd(td, uap)
4241 	struct thread *td;
4242 	struct extattr_delete_fd_args /* {
4243 		int fd;
4244 		int attrnamespace;
4245 		const char *attrname;
4246 	} */ *uap;
4247 {
4248 	struct file *fp;
4249 	struct vnode *vp;
4250 	char attrname[EXTATTR_MAXNAMELEN];
4251 	int error;
4252 
4253 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4254 	if (error)
4255 		return (error);
4256 
4257 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4258 	if (error)
4259 		return (error);
4260 	vp = fp->f_data;
4261 
4262 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4263 	fdrop(fp, td);
4264 	return (error);
4265 }
4266 
4267 int
4268 extattr_delete_file(td, uap)
4269 	struct thread *td;
4270 	struct extattr_delete_file_args /* {
4271 		const char *path;
4272 		int attrnamespace;
4273 		const char *attrname;
4274 	} */ *uap;
4275 {
4276 	struct nameidata nd;
4277 	char attrname[EXTATTR_MAXNAMELEN];
4278 	int error;
4279 
4280 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4281 	if (error)
4282 		return(error);
4283 
4284 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4285 	error = namei(&nd);
4286 	if (error)
4287 		return(error);
4288 	NDFREE(&nd, NDF_ONLY_PNBUF);
4289 
4290 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4291 	vrele(nd.ni_vp);
4292 	return(error);
4293 }
4294 
4295 int
4296 extattr_delete_link(td, uap)
4297 	struct thread *td;
4298 	struct extattr_delete_link_args /* {
4299 		const char *path;
4300 		int attrnamespace;
4301 		const char *attrname;
4302 	} */ *uap;
4303 {
4304 	struct nameidata nd;
4305 	char attrname[EXTATTR_MAXNAMELEN];
4306 	int error;
4307 
4308 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4309 	if (error)
4310 		return(error);
4311 
4312 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4313 	error = namei(&nd);
4314 	if (error)
4315 		return(error);
4316 	NDFREE(&nd, NDF_ONLY_PNBUF);
4317 
4318 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4319 	vrele(nd.ni_vp);
4320 	return(error);
4321 }
4322 
4323 /*-
4324  * Retrieve a list of extended attributes on a file or directory.
4325  *
4326  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4327  *            userspace buffer pointer "data", buffer length "nbytes",
4328  *            thread "td".
4329  * Returns: 0 on success, an error number otherwise
4330  * Locks: none
4331  * References: vp must be a valid reference for the duration of the call
4332  */
4333 static int
4334 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4335     size_t nbytes, struct thread *td)
4336 {
4337 	struct uio auio, *auiop;
4338 	size_t size, *sizep;
4339 	struct iovec aiov;
4340 	ssize_t cnt;
4341 	int error;
4342 
4343 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4344 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4345 
4346 	auiop = NULL;
4347 	sizep = NULL;
4348 	cnt = 0;
4349 	if (data != NULL) {
4350 		aiov.iov_base = data;
4351 		aiov.iov_len = nbytes;
4352 		auio.uio_iov = &aiov;
4353 		auio.uio_offset = 0;
4354 		if (nbytes > INT_MAX) {
4355 			error = EINVAL;
4356 			goto done;
4357 		}
4358 		auio.uio_resid = nbytes;
4359 		auio.uio_rw = UIO_READ;
4360 		auio.uio_segflg = UIO_USERSPACE;
4361 		auio.uio_td = td;
4362 		auiop = &auio;
4363 		cnt = nbytes;
4364 	} else
4365 		sizep = &size;
4366 
4367 #ifdef MAC
4368 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4369 	    "", &auio);
4370 	if (error)
4371 		goto done;
4372 #endif
4373 
4374 	error = VOP_GETEXTATTR(vp, attrnamespace, "", auiop, sizep,
4375 	    td->td_ucred, td);
4376 
4377 	if (auiop != NULL) {
4378 		cnt -= auio.uio_resid;
4379 		td->td_retval[0] = cnt;
4380 	} else
4381 		td->td_retval[0] = size;
4382 
4383 done:
4384 	VOP_UNLOCK(vp, 0, td);
4385 	return (error);
4386 }
4387 
4388 
4389 int
4390 extattr_list_fd(td, uap)
4391 	struct thread *td;
4392 	struct extattr_list_fd_args /* {
4393 		int fd;
4394 		int attrnamespace;
4395 		void *data;
4396 		size_t nbytes;
4397 	} */ *uap;
4398 {
4399 	struct file *fp;
4400 	int error;
4401 
4402 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4403 	if (error)
4404 		return (error);
4405 
4406 	error = extattr_list_vp(fp->f_data, uap->attrnamespace, uap->data,
4407 	    uap->nbytes, td);
4408 
4409 	fdrop(fp, td);
4410 	return (error);
4411 }
4412 
4413 int
4414 extattr_list_file(td, uap)
4415 	struct thread*td;
4416 	struct extattr_list_file_args /* {
4417 		const char *path;
4418 		int attrnamespace;
4419 		void *data;
4420 		size_t nbytes;
4421 	} */ *uap;
4422 {
4423 	struct nameidata nd;
4424 	int error;
4425 
4426 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4427 	error = namei(&nd);
4428 	if (error)
4429 		return (error);
4430 	NDFREE(&nd, NDF_ONLY_PNBUF);
4431 
4432 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4433 	    uap->nbytes, td);
4434 
4435 	vrele(nd.ni_vp);
4436 	return (error);
4437 }
4438 
4439 int
4440 extattr_list_link(td, uap)
4441 	struct thread*td;
4442 	struct extattr_list_link_args /* {
4443 		const char *path;
4444 		int attrnamespace;
4445 		void *data;
4446 		size_t nbytes;
4447 	} */ *uap;
4448 {
4449 	struct nameidata nd;
4450 	int error;
4451 
4452 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4453 	error = namei(&nd);
4454 	if (error)
4455 		return (error);
4456 	NDFREE(&nd, NDF_ONLY_PNBUF);
4457 
4458 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4459 	    uap->nbytes, td);
4460 
4461 	vrele(nd.ni_vp);
4462 	return (error);
4463 }
4464 
4465