xref: /freebsd/sys/kern/vfs_extattr.c (revision 41466b50c1d5bfd1cf6adaae547a579a75d7c04e)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_zone.h>
76 #include <vm/vm_page.h>
77 
78 static int change_dir __P((struct nameidata *ndp, struct thread *td));
79 static void checkdirs __P((struct vnode *olddp, struct vnode *newdp));
80 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
81 static int getutimes __P((const struct timeval *, struct timespec *));
82 static int setfown __P((struct thread *td, struct vnode *, uid_t, gid_t));
83 static int setfmode __P((struct thread *td, struct vnode *, int));
84 static int setfflags __P((struct thread *td, struct vnode *, int));
85 static int setutimes __P((struct thread *td, struct vnode *,
86     const struct timespec *, int));
87 static int vn_access __P((struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td));
89 
90 static int	usermount = 0;	/* if 1, non-root can mount fs. */
91 
92 int (*union_dircheckp) __P((struct thread *td, struct vnode **, struct file *));
93 
94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
95 
96 /*
97  * Virtual File System System Calls
98  */
99 
100 /*
101  * Mount a file system.
102  */
103 #ifndef _SYS_SYSPROTO_H_
104 struct mount_args {
105 	char	*type;
106 	char	*path;
107 	int	flags;
108 	caddr_t	data;
109 };
110 #endif
111 /* ARGSUSED */
112 int
113 mount(td, uap)
114 	struct thread *td;
115 	struct mount_args /* {
116 		syscallarg(char *) type;
117 		syscallarg(char *) path;
118 		syscallarg(int) flags;
119 		syscallarg(caddr_t) data;
120 	} */ *uap;
121 {
122 	char *fstype;
123 	char *fspath;
124 	int error;
125 
126 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
127 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
128 
129 	/*
130 	 * vfs_mount() actually takes a kernel string for `type' and
131 	 * `path' now, so extract them.
132 	 */
133 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
134 	if (error)
135 		goto finish;
136 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
137 	if (error)
138 		goto finish;
139 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
140 	    SCARG(uap, data));
141 finish:
142 	free(fstype, M_TEMP);
143 	free(fspath, M_TEMP);
144 	return (error);
145 }
146 
147 /*
148  * vfs_mount(): actually attempt a filesystem mount.
149  *
150  * This routine is designed to be a "generic" entry point for routines
151  * that wish to mount a filesystem. All parameters except `fsdata' are
152  * pointers into kernel space. `fsdata' is currently still a pointer
153  * into userspace.
154  */
155 int
156 vfs_mount(td, fstype, fspath, fsflags, fsdata)
157 	struct thread *td;
158 	const char *fstype;
159 	char *fspath;
160 	int fsflags;
161 	void *fsdata;
162 {
163 	struct vnode *vp;
164 	struct mount *mp;
165 	struct vfsconf *vfsp;
166 	int error, flag = 0, flag2 = 0;
167 	struct vattr va;
168 	struct nameidata nd;
169 	struct proc *p = td->td_proc;
170 
171 	/*
172 	 * Be ultra-paranoid about making sure the type and fspath
173 	 * variables will fit in our mp buffers, including the
174 	 * terminating NUL.
175 	 */
176 	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
177 	    (strlen(fspath) >= MNAMELEN - 1))
178 		return (ENAMETOOLONG);
179 
180 	if (usermount == 0) {
181 		error = suser_td(td);
182 		if (error)
183 			return (error);
184 	}
185 	/*
186 	 * Do not allow NFS export by non-root users.
187 	 */
188 	if (fsflags & MNT_EXPORTED) {
189 		error = suser_td(td);
190 		if (error)
191 			return (error);
192 	}
193 	/*
194 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
195 	 */
196 	if (suser_xxx(p->p_ucred, 0, 0))
197 		fsflags |= MNT_NOSUID | MNT_NODEV;
198 	/*
199 	 * Get vnode to be covered
200 	 */
201 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
202 	if ((error = namei(&nd)) != 0)
203 		return (error);
204 	NDFREE(&nd, NDF_ONLY_PNBUF);
205 	vp = nd.ni_vp;
206 	if (fsflags & MNT_UPDATE) {
207 		if ((vp->v_flag & VROOT) == 0) {
208 			vput(vp);
209 			return (EINVAL);
210 		}
211 		mp = vp->v_mount;
212 		flag = mp->mnt_flag;
213 		flag2 = mp->mnt_kern_flag;
214 		/*
215 		 * We only allow the filesystem to be reloaded if it
216 		 * is currently mounted read-only.
217 		 */
218 		if ((fsflags & MNT_RELOAD) &&
219 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
220 			vput(vp);
221 			return (EOPNOTSUPP);	/* Needs translation */
222 		}
223 		/*
224 		 * Only root, or the user that did the original mount is
225 		 * permitted to update it.
226 		 */
227 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid) {
228 			error = suser_td(td);
229 			if (error) {
230 				vput(vp);
231 				return (error);
232 			}
233 		}
234 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
235 			vput(vp);
236 			return (EBUSY);
237 		}
238 		mtx_lock(&vp->v_interlock);
239 		if ((vp->v_flag & VMOUNT) != 0 ||
240 		    vp->v_mountedhere != NULL) {
241 			mtx_unlock(&vp->v_interlock);
242 			vfs_unbusy(mp, td);
243 			vput(vp);
244 			return (EBUSY);
245 		}
246 		vp->v_flag |= VMOUNT;
247 		mtx_unlock(&vp->v_interlock);
248 		mp->mnt_flag |= fsflags &
249 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
250 		VOP_UNLOCK(vp, 0, td);
251 		goto update;
252 	}
253 	/*
254 	 * If the user is not root, ensure that they own the directory
255 	 * onto which we are attempting to mount.
256 	 */
257 	error = VOP_GETATTR(vp, &va, p->p_ucred, td);
258 	if (error) {
259 		vput(vp);
260 		return (error);
261 	}
262 	if (va.va_uid != p->p_ucred->cr_uid) {
263 		error = suser_td(td);
264 		if (error) {
265 			vput(vp);
266 			return (error);
267 		}
268 	}
269 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, td, 0, 0)) != 0) {
270 		vput(vp);
271 		return (error);
272 	}
273 	if (vp->v_type != VDIR) {
274 		vput(vp);
275 		return (ENOTDIR);
276 	}
277 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
278 		if (!strcmp(vfsp->vfc_name, fstype))
279 			break;
280 	if (vfsp == NULL) {
281 		linker_file_t lf;
282 
283 		/* Only load modules for root (very important!) */
284 		error = suser_td(td);
285 		if (error) {
286 			vput(vp);
287 			return error;
288 		}
289 		error = linker_load_file(fstype, &lf);
290 		if (error || lf == NULL) {
291 			vput(vp);
292 			if (lf == NULL)
293 				error = ENODEV;
294 			return error;
295 		}
296 		lf->userrefs++;
297 		/* lookup again, see if the VFS was loaded */
298 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
299 			if (!strcmp(vfsp->vfc_name, fstype))
300 				break;
301 		if (vfsp == NULL) {
302 			lf->userrefs--;
303 			linker_file_unload(lf);
304 			vput(vp);
305 			return (ENODEV);
306 		}
307 	}
308 	mtx_lock(&vp->v_interlock);
309 	if ((vp->v_flag & VMOUNT) != 0 ||
310 	    vp->v_mountedhere != NULL) {
311 		mtx_unlock(&vp->v_interlock);
312 		vput(vp);
313 		return (EBUSY);
314 	}
315 	vp->v_flag |= VMOUNT;
316 	mtx_unlock(&vp->v_interlock);
317 
318 	/*
319 	 * Allocate and initialize the filesystem.
320 	 */
321 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
322 	TAILQ_INIT(&mp->mnt_nvnodelist);
323 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
324 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
325 	mp->mnt_op = vfsp->vfc_vfsops;
326 	mp->mnt_vfc = vfsp;
327 	vfsp->vfc_refcount++;
328 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
329 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
330 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
331 	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
332 	mp->mnt_vnodecovered = vp;
333 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
334 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
335 	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
336 	mp->mnt_iosize_max = DFLTPHYS;
337 	VOP_UNLOCK(vp, 0, td);
338 update:
339 	/*
340 	 * Set the mount level flags.
341 	 */
342 	if (fsflags & MNT_RDONLY)
343 		mp->mnt_flag |= MNT_RDONLY;
344 	else if (mp->mnt_flag & MNT_RDONLY)
345 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
346 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
347 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
348 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
349 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
350 	mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC |
351 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
352 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
353 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
354 	/*
355 	 * Mount the filesystem.
356 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
357 	 * get.  No freeing of cn_pnbuf.
358 	 */
359 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
360 	if (mp->mnt_flag & MNT_UPDATE) {
361 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
362 			mp->mnt_flag &= ~MNT_RDONLY;
363 		mp->mnt_flag &=~
364 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
365 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
366 		if (error) {
367 			mp->mnt_flag = flag;
368 			mp->mnt_kern_flag = flag2;
369 		}
370 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
371 			if (mp->mnt_syncer == NULL)
372 				error = vfs_allocate_syncvnode(mp);
373 		} else {
374 			if (mp->mnt_syncer != NULL)
375 				vrele(mp->mnt_syncer);
376 			mp->mnt_syncer = NULL;
377 		}
378 		vfs_unbusy(mp, td);
379 		mtx_lock(&vp->v_interlock);
380 		vp->v_flag &= ~VMOUNT;
381 		mtx_unlock(&vp->v_interlock);
382 		vrele(vp);
383 		return (error);
384 	}
385 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
386 	/*
387 	 * Put the new filesystem on the mount list after root.
388 	 */
389 	cache_purge(vp);
390 	if (!error) {
391 		struct vnode *newdp;
392 
393 		mtx_lock(&vp->v_interlock);
394 		vp->v_flag &= ~VMOUNT;
395 		vp->v_mountedhere = mp;
396 		mtx_unlock(&vp->v_interlock);
397 		mtx_lock(&mountlist_mtx);
398 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
399 		mtx_unlock(&mountlist_mtx);
400 		if (VFS_ROOT(mp, &newdp))
401 			panic("mount: lost mount");
402 		checkdirs(vp, newdp);
403 		vput(newdp);
404 		VOP_UNLOCK(vp, 0, td);
405 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
406 			error = vfs_allocate_syncvnode(mp);
407 		vfs_unbusy(mp, td);
408 		if ((error = VFS_START(mp, 0, td)) != 0)
409 			vrele(vp);
410 	} else {
411 		mtx_lock(&vp->v_interlock);
412 		vp->v_flag &= ~VMOUNT;
413 		mtx_unlock(&vp->v_interlock);
414 		mp->mnt_vfc->vfc_refcount--;
415 		vfs_unbusy(mp, td);
416 		free((caddr_t)mp, M_MOUNT);
417 		vput(vp);
418 	}
419 	return (error);
420 }
421 
422 /*
423  * Scan all active processes to see if any of them have a current
424  * or root directory of `olddp'. If so, replace them with the new
425  * mount point.
426  */
427 static void
428 checkdirs(olddp, newdp)
429 	struct vnode *olddp, *newdp;
430 {
431 	struct filedesc *fdp;
432 	struct proc *p;
433 
434 	if (olddp->v_usecount == 1)
435 		return;
436 	sx_slock(&allproc_lock);
437 	LIST_FOREACH(p, &allproc, p_list) {
438 		fdp = p->p_fd;
439 		if (fdp == NULL)
440 			continue;
441 		if (fdp->fd_cdir == olddp) {
442 			vrele(fdp->fd_cdir);
443 			VREF(newdp);
444 			fdp->fd_cdir = newdp;
445 		}
446 		if (fdp->fd_rdir == olddp) {
447 			vrele(fdp->fd_rdir);
448 			VREF(newdp);
449 			fdp->fd_rdir = newdp;
450 		}
451 	}
452 	sx_sunlock(&allproc_lock);
453 	if (rootvnode == olddp) {
454 		vrele(rootvnode);
455 		VREF(newdp);
456 		rootvnode = newdp;
457 	}
458 }
459 
460 /*
461  * Unmount a file system.
462  *
463  * Note: unmount takes a path to the vnode mounted on as argument,
464  * not special file (as before).
465  */
466 #ifndef _SYS_SYSPROTO_H_
467 struct unmount_args {
468 	char	*path;
469 	int	flags;
470 };
471 #endif
472 /* ARGSUSED */
473 int
474 unmount(td, uap)
475 	struct thread *td;
476 	register struct unmount_args /* {
477 		syscallarg(char *) path;
478 		syscallarg(int) flags;
479 	} */ *uap;
480 {
481 	register struct vnode *vp;
482 	struct mount *mp;
483 	int error;
484 	struct nameidata nd;
485 
486 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
487 	    SCARG(uap, path), td);
488 	if ((error = namei(&nd)) != 0)
489 		return (error);
490 	vp = nd.ni_vp;
491 	NDFREE(&nd, NDF_ONLY_PNBUF);
492 	mp = vp->v_mount;
493 
494 	/*
495 	 * Only root, or the user that did the original mount is
496 	 * permitted to unmount this filesystem.
497 	 */
498 	if (mp->mnt_stat.f_owner != td->td_proc->p_ucred->cr_uid) {
499 		error = suser_td(td);
500 		if (error) {
501 			vput(vp);
502 			return (error);
503 		}
504 	}
505 
506 	/*
507 	 * Don't allow unmounting the root file system.
508 	 */
509 	if (mp->mnt_flag & MNT_ROOTFS) {
510 		vput(vp);
511 		return (EINVAL);
512 	}
513 
514 	/*
515 	 * Must be the root of the filesystem
516 	 */
517 	if ((vp->v_flag & VROOT) == 0) {
518 		vput(vp);
519 		return (EINVAL);
520 	}
521 	vput(vp);
522 	return (dounmount(mp, SCARG(uap, flags), td));
523 }
524 
525 /*
526  * Do the actual file system unmount.
527  */
528 int
529 dounmount(mp, flags, td)
530 	struct mount *mp;
531 	int flags;
532 	struct thread *td;
533 {
534 	struct vnode *coveredvp, *fsrootvp;
535 	int error;
536 	int async_flag;
537 
538 	mtx_lock(&mountlist_mtx);
539 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
540 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, td);
541 	vn_start_write(NULL, &mp, V_WAIT);
542 
543 	if (mp->mnt_flag & MNT_EXPUBLIC)
544 		vfs_setpublicfs(NULL, NULL, NULL);
545 
546 	vfs_msync(mp, MNT_WAIT);
547 	async_flag = mp->mnt_flag & MNT_ASYNC;
548 	mp->mnt_flag &=~ MNT_ASYNC;
549 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
550 	if (mp->mnt_syncer != NULL)
551 		vrele(mp->mnt_syncer);
552 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
553 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
554 		if (mp->mnt_vnodecovered != NULL)
555 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
556 		if (fsrootvp == rootvnode) {
557 			vrele(rootvnode);
558 			rootvnode = NULL;
559 		}
560 		vput(fsrootvp);
561 	}
562 	if (((mp->mnt_flag & MNT_RDONLY) ||
563 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td)) == 0) ||
564 	    (flags & MNT_FORCE)) {
565 		error = VFS_UNMOUNT(mp, flags, td);
566 	}
567 	vn_finished_write(mp);
568 	if (error) {
569 		/* Undo cdir/rdir and rootvnode changes made above. */
570 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
571 			if (mp->mnt_vnodecovered != NULL)
572 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
573 			if (rootvnode == NULL) {
574 				rootvnode = fsrootvp;
575 				vref(rootvnode);
576 			}
577 			vput(fsrootvp);
578 		}
579 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
580 			(void) vfs_allocate_syncvnode(mp);
581 		mtx_lock(&mountlist_mtx);
582 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
583 		mp->mnt_flag |= async_flag;
584 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
585 		    &mountlist_mtx, td);
586 		if (mp->mnt_kern_flag & MNTK_MWAIT)
587 			wakeup((caddr_t)mp);
588 		return (error);
589 	}
590 	mtx_lock(&mountlist_mtx);
591 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
592 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
593 		coveredvp->v_mountedhere = NULL;
594 	mp->mnt_vfc->vfc_refcount--;
595 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
596 		panic("unmount: dangling vnode");
597 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
598 	lockdestroy(&mp->mnt_lock);
599 	if (coveredvp != NULL)
600 		vrele(coveredvp);
601 	if (mp->mnt_kern_flag & MNTK_MWAIT)
602 		wakeup((caddr_t)mp);
603 	free((caddr_t)mp, M_MOUNT);
604 	return (0);
605 }
606 
607 /*
608  * Sync each mounted filesystem.
609  */
610 #ifndef _SYS_SYSPROTO_H_
611 struct sync_args {
612         int     dummy;
613 };
614 #endif
615 
616 #ifdef DEBUG
617 static int syncprt = 0;
618 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
619 #endif
620 
621 /* ARGSUSED */
622 int
623 sync(td, uap)
624 	struct thread *td;
625 	struct sync_args *uap;
626 {
627 	struct mount *mp, *nmp;
628 	int asyncflag;
629 
630 	mtx_lock(&mountlist_mtx);
631 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
632 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
633 			nmp = TAILQ_NEXT(mp, mnt_list);
634 			continue;
635 		}
636 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
637 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
638 			asyncflag = mp->mnt_flag & MNT_ASYNC;
639 			mp->mnt_flag &= ~MNT_ASYNC;
640 			vfs_msync(mp, MNT_NOWAIT);
641 			VFS_SYNC(mp, MNT_NOWAIT,
642 			    ((td != NULL) ? td->td_proc->p_ucred : NOCRED), td);
643 			mp->mnt_flag |= asyncflag;
644 			vn_finished_write(mp);
645 		}
646 		mtx_lock(&mountlist_mtx);
647 		nmp = TAILQ_NEXT(mp, mnt_list);
648 		vfs_unbusy(mp, td);
649 	}
650 	mtx_unlock(&mountlist_mtx);
651 #if 0
652 /*
653  * XXX don't call vfs_bufstats() yet because that routine
654  * was not imported in the Lite2 merge.
655  */
656 #ifdef DIAGNOSTIC
657 	if (syncprt)
658 		vfs_bufstats();
659 #endif /* DIAGNOSTIC */
660 #endif
661 	return (0);
662 }
663 
664 /* XXX PRISON: could be per prison flag */
665 static int prison_quotas;
666 #if 0
667 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
668 #endif
669 
670 /*
671  * Change filesystem quotas.
672  */
673 #ifndef _SYS_SYSPROTO_H_
674 struct quotactl_args {
675 	char *path;
676 	int cmd;
677 	int uid;
678 	caddr_t arg;
679 };
680 #endif
681 /* ARGSUSED */
682 int
683 quotactl(td, uap)
684 	struct thread *td;
685 	register struct quotactl_args /* {
686 		syscallarg(char *) path;
687 		syscallarg(int) cmd;
688 		syscallarg(int) uid;
689 		syscallarg(caddr_t) arg;
690 	} */ *uap;
691 {
692 	struct mount *mp;
693 	int error;
694 	struct nameidata nd;
695 
696 	if (jailed(td->td_proc->p_ucred) && !prison_quotas)
697 		return (EPERM);
698 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
699 	if ((error = namei(&nd)) != 0)
700 		return (error);
701 	NDFREE(&nd, NDF_ONLY_PNBUF);
702 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
703 	vrele(nd.ni_vp);
704 	if (error)
705 		return (error);
706 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
707 	    SCARG(uap, arg), td);
708 	vn_finished_write(mp);
709 	return (error);
710 }
711 
712 /*
713  * Get filesystem statistics.
714  */
715 #ifndef _SYS_SYSPROTO_H_
716 struct statfs_args {
717 	char *path;
718 	struct statfs *buf;
719 };
720 #endif
721 /* ARGSUSED */
722 int
723 statfs(td, uap)
724 	struct thread *td;
725 	register struct statfs_args /* {
726 		syscallarg(char *) path;
727 		syscallarg(struct statfs *) buf;
728 	} */ *uap;
729 {
730 	register struct mount *mp;
731 	register struct statfs *sp;
732 	int error;
733 	struct nameidata nd;
734 	struct statfs sb;
735 
736 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
737 	if ((error = namei(&nd)) != 0)
738 		return (error);
739 	mp = nd.ni_vp->v_mount;
740 	sp = &mp->mnt_stat;
741 	NDFREE(&nd, NDF_ONLY_PNBUF);
742 	vrele(nd.ni_vp);
743 	error = VFS_STATFS(mp, sp, td);
744 	if (error)
745 		return (error);
746 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
747 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
748 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
749 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
750 		sp = &sb;
751 	}
752 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
753 }
754 
755 /*
756  * Get filesystem statistics.
757  */
758 #ifndef _SYS_SYSPROTO_H_
759 struct fstatfs_args {
760 	int fd;
761 	struct statfs *buf;
762 };
763 #endif
764 /* ARGSUSED */
765 int
766 fstatfs(td, uap)
767 	struct thread *td;
768 	register struct fstatfs_args /* {
769 		syscallarg(int) fd;
770 		syscallarg(struct statfs *) buf;
771 	} */ *uap;
772 {
773 	struct file *fp;
774 	struct mount *mp;
775 	register struct statfs *sp;
776 	int error;
777 	struct statfs sb;
778 
779 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
780 		return (error);
781 	mp = ((struct vnode *)fp->f_data)->v_mount;
782 	sp = &mp->mnt_stat;
783 	error = VFS_STATFS(mp, sp, td);
784 	if (error)
785 		return (error);
786 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
787 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
788 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
789 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
790 		sp = &sb;
791 	}
792 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
793 }
794 
795 /*
796  * Get statistics on all filesystems.
797  */
798 #ifndef _SYS_SYSPROTO_H_
799 struct getfsstat_args {
800 	struct statfs *buf;
801 	long bufsize;
802 	int flags;
803 };
804 #endif
805 int
806 getfsstat(td, uap)
807 	struct thread *td;
808 	register struct getfsstat_args /* {
809 		syscallarg(struct statfs *) buf;
810 		syscallarg(long) bufsize;
811 		syscallarg(int) flags;
812 	} */ *uap;
813 {
814 	register struct mount *mp, *nmp;
815 	register struct statfs *sp;
816 	caddr_t sfsp;
817 	long count, maxcount, error;
818 
819 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
820 	sfsp = (caddr_t)SCARG(uap, buf);
821 	count = 0;
822 	mtx_lock(&mountlist_mtx);
823 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
824 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
825 			nmp = TAILQ_NEXT(mp, mnt_list);
826 			continue;
827 		}
828 		if (sfsp && count < maxcount) {
829 			sp = &mp->mnt_stat;
830 			/*
831 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
832 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
833 			 * overrides MNT_WAIT.
834 			 */
835 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
836 			    (SCARG(uap, flags) & MNT_WAIT)) &&
837 			    (error = VFS_STATFS(mp, sp, td))) {
838 				mtx_lock(&mountlist_mtx);
839 				nmp = TAILQ_NEXT(mp, mnt_list);
840 				vfs_unbusy(mp, td);
841 				continue;
842 			}
843 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
844 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
845 			if (error) {
846 				vfs_unbusy(mp, td);
847 				return (error);
848 			}
849 			sfsp += sizeof(*sp);
850 		}
851 		count++;
852 		mtx_lock(&mountlist_mtx);
853 		nmp = TAILQ_NEXT(mp, mnt_list);
854 		vfs_unbusy(mp, td);
855 	}
856 	mtx_unlock(&mountlist_mtx);
857 	if (sfsp && count > maxcount)
858 		td->td_retval[0] = maxcount;
859 	else
860 		td->td_retval[0] = count;
861 	return (0);
862 }
863 
864 /*
865  * Change current working directory to a given file descriptor.
866  */
867 #ifndef _SYS_SYSPROTO_H_
868 struct fchdir_args {
869 	int	fd;
870 };
871 #endif
872 /* ARGSUSED */
873 int
874 fchdir(td, uap)
875 	struct thread *td;
876 	struct fchdir_args /* {
877 		syscallarg(int) fd;
878 	} */ *uap;
879 {
880 	register struct filedesc *fdp = td->td_proc->p_fd;
881 	struct vnode *vp, *tdp;
882 	struct mount *mp;
883 	struct file *fp;
884 	int error;
885 
886 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
887 		return (error);
888 	vp = (struct vnode *)fp->f_data;
889 	VREF(vp);
890 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
891 	if (vp->v_type != VDIR)
892 		error = ENOTDIR;
893 	else
894 		error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
895 	while (!error && (mp = vp->v_mountedhere) != NULL) {
896 		if (vfs_busy(mp, 0, 0, td))
897 			continue;
898 		error = VFS_ROOT(mp, &tdp);
899 		vfs_unbusy(mp, td);
900 		if (error)
901 			break;
902 		vput(vp);
903 		vp = tdp;
904 	}
905 	if (error) {
906 		vput(vp);
907 		return (error);
908 	}
909 	VOP_UNLOCK(vp, 0, td);
910 	vrele(fdp->fd_cdir);
911 	fdp->fd_cdir = vp;
912 	return (0);
913 }
914 
915 /*
916  * Change current working directory (``.'').
917  */
918 #ifndef _SYS_SYSPROTO_H_
919 struct chdir_args {
920 	char	*path;
921 };
922 #endif
923 /* ARGSUSED */
924 int
925 chdir(td, uap)
926 	struct thread *td;
927 	struct chdir_args /* {
928 		syscallarg(char *) path;
929 	} */ *uap;
930 {
931 	register struct filedesc *fdp = td->td_proc->p_fd;
932 	int error;
933 	struct nameidata nd;
934 
935 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
936 	    SCARG(uap, path), td);
937 	if ((error = change_dir(&nd, td)) != 0)
938 		return (error);
939 	NDFREE(&nd, NDF_ONLY_PNBUF);
940 	vrele(fdp->fd_cdir);
941 	fdp->fd_cdir = nd.ni_vp;
942 	return (0);
943 }
944 
945 /*
946  * Helper function for raised chroot(2) security function:  Refuse if
947  * any filedescriptors are open directories.
948  */
949 static int
950 chroot_refuse_vdir_fds(fdp)
951 	struct filedesc *fdp;
952 {
953 	struct vnode *vp;
954 	struct file *fp;
955 	int error;
956 	int fd;
957 
958 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
959 		error = getvnode(fdp, fd, &fp);
960 		if (error)
961 			continue;
962 		vp = (struct vnode *)fp->f_data;
963 		if (vp->v_type != VDIR)
964 			continue;
965 		return(EPERM);
966 	}
967 	return (0);
968 }
969 
970 /*
971  * This sysctl determines if we will allow a process to chroot(2) if it
972  * has a directory open:
973  *	0: disallowed for all processes.
974  *	1: allowed for processes that were not already chroot(2)'ed.
975  *	2: allowed for all processes.
976  */
977 
978 static int chroot_allow_open_directories = 1;
979 
980 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
981      &chroot_allow_open_directories, 0, "");
982 
983 /*
984  * Change notion of root (``/'') directory.
985  */
986 #ifndef _SYS_SYSPROTO_H_
987 struct chroot_args {
988 	char	*path;
989 };
990 #endif
991 /* ARGSUSED */
992 int
993 chroot(td, uap)
994 	struct thread *td;
995 	struct chroot_args /* {
996 		syscallarg(char *) path;
997 	} */ *uap;
998 {
999 	register struct filedesc *fdp = td->td_proc->p_fd;
1000 	int error;
1001 	struct nameidata nd;
1002 
1003 	error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1004 	if (error)
1005 		return (error);
1006 	if (chroot_allow_open_directories == 0 ||
1007 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
1008 		error = chroot_refuse_vdir_fds(fdp);
1009 	if (error)
1010 		return (error);
1011 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1012 	    SCARG(uap, path), td);
1013 	if ((error = change_dir(&nd, td)) != 0)
1014 		return (error);
1015 	NDFREE(&nd, NDF_ONLY_PNBUF);
1016 	vrele(fdp->fd_rdir);
1017 	fdp->fd_rdir = nd.ni_vp;
1018 	if (!fdp->fd_jdir) {
1019 		fdp->fd_jdir = nd.ni_vp;
1020                 VREF(fdp->fd_jdir);
1021 	}
1022 	return (0);
1023 }
1024 
1025 /*
1026  * Common routine for chroot and chdir.
1027  */
1028 static int
1029 change_dir(ndp, td)
1030 	register struct nameidata *ndp;
1031 	struct thread *td;
1032 {
1033 	struct vnode *vp;
1034 	int error;
1035 
1036 	error = namei(ndp);
1037 	if (error)
1038 		return (error);
1039 	vp = ndp->ni_vp;
1040 	if (vp->v_type != VDIR)
1041 		error = ENOTDIR;
1042 	else
1043 		error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
1044 	if (error)
1045 		vput(vp);
1046 	else
1047 		VOP_UNLOCK(vp, 0, td);
1048 	return (error);
1049 }
1050 
1051 /*
1052  * Check permissions, allocate an open file structure,
1053  * and call the device open routine if any.
1054  */
1055 #ifndef _SYS_SYSPROTO_H_
1056 struct open_args {
1057 	char	*path;
1058 	int	flags;
1059 	int	mode;
1060 };
1061 #endif
1062 int
1063 open(td, uap)
1064 	struct thread *td;
1065 	register struct open_args /* {
1066 		syscallarg(char *) path;
1067 		syscallarg(int) flags;
1068 		syscallarg(int) mode;
1069 	} */ *uap;
1070 {
1071 	struct proc *p = td->td_proc;
1072 	struct filedesc *fdp = p->p_fd;
1073 	struct file *fp;
1074 	struct vnode *vp;
1075 	struct vattr vat;
1076 	struct mount *mp;
1077 	int cmode, flags, oflags;
1078 	struct file *nfp;
1079 	int type, indx, error;
1080 	struct flock lf;
1081 	struct nameidata nd;
1082 
1083 	oflags = SCARG(uap, flags);
1084 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1085 		return (EINVAL);
1086 	flags = FFLAGS(oflags);
1087 	error = falloc(td, &nfp, &indx);
1088 	if (error)
1089 		return (error);
1090 	fp = nfp;
1091 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1092 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1093 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1094 	/*
1095 	 * Bump the ref count to prevent another process from closing
1096 	 * the descriptor while we are blocked in vn_open()
1097 	 */
1098 	fhold(fp);
1099 	error = vn_open(&nd, &flags, cmode);
1100 	if (error) {
1101 		/*
1102 		 * release our own reference
1103 		 */
1104 		fdrop(fp, td);
1105 
1106 		/*
1107 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1108 		 * responsible for dropping the old contents of ofiles[indx]
1109 		 * if it succeeds.
1110 		 */
1111 		if ((error == ENODEV || error == ENXIO) &&
1112 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1113 		    (error =
1114 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1115 			td->td_retval[0] = indx;
1116 			return (0);
1117 		}
1118 		/*
1119 		 * Clean up the descriptor, but only if another thread hadn't
1120 		 * replaced or closed it.
1121 		 */
1122 		if (fdp->fd_ofiles[indx] == fp) {
1123 			fdp->fd_ofiles[indx] = NULL;
1124 			fdrop(fp, td);
1125 		}
1126 
1127 		if (error == ERESTART)
1128 			error = EINTR;
1129 		return (error);
1130 	}
1131 	td->td_dupfd = 0;
1132 	NDFREE(&nd, NDF_ONLY_PNBUF);
1133 	vp = nd.ni_vp;
1134 
1135 	/*
1136 	 * There should be 2 references on the file, one from the descriptor
1137 	 * table, and one for us.
1138 	 *
1139 	 * Handle the case where someone closed the file (via its file
1140 	 * descriptor) while we were blocked.  The end result should look
1141 	 * like opening the file succeeded but it was immediately closed.
1142 	 */
1143 	if (fp->f_count == 1) {
1144 		KASSERT(fdp->fd_ofiles[indx] != fp,
1145 		    ("Open file descriptor lost all refs"));
1146 		VOP_UNLOCK(vp, 0, td);
1147 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1148 		fdrop(fp, td);
1149 		td->td_retval[0] = indx;
1150 		return 0;
1151 	}
1152 
1153 	fp->f_data = (caddr_t)vp;
1154 	fp->f_flag = flags & FMASK;
1155 	fp->f_ops = &vnops;
1156 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1157 	VOP_UNLOCK(vp, 0, td);
1158 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1159 		lf.l_whence = SEEK_SET;
1160 		lf.l_start = 0;
1161 		lf.l_len = 0;
1162 		if (flags & O_EXLOCK)
1163 			lf.l_type = F_WRLCK;
1164 		else
1165 			lf.l_type = F_RDLCK;
1166 		type = F_FLOCK;
1167 		if ((flags & FNONBLOCK) == 0)
1168 			type |= F_WAIT;
1169 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1170 			goto bad;
1171 		fp->f_flag |= FHASLOCK;
1172 	}
1173 	if (flags & O_TRUNC) {
1174 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1175 			goto bad;
1176 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1177 		VATTR_NULL(&vat);
1178 		vat.va_size = 0;
1179 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1180 		error = VOP_SETATTR(vp, &vat, p->p_ucred, td);
1181 		VOP_UNLOCK(vp, 0, td);
1182 		vn_finished_write(mp);
1183 		if (error)
1184 			goto bad;
1185 	}
1186 	/* assert that vn_open created a backing object if one is needed */
1187 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1188 		("open: vmio vnode has no backing object after vn_open"));
1189 	/*
1190 	 * Release our private reference, leaving the one associated with
1191 	 * the descriptor table intact.
1192 	 */
1193 	fdrop(fp, td);
1194 	td->td_retval[0] = indx;
1195 	return (0);
1196 bad:
1197 	if (fdp->fd_ofiles[indx] == fp) {
1198 		fdp->fd_ofiles[indx] = NULL;
1199 		fdrop(fp, td);
1200 	}
1201 	fdrop(fp, td);
1202 	return (error);
1203 }
1204 
1205 #ifdef COMPAT_43
1206 /*
1207  * Create a file.
1208  */
1209 #ifndef _SYS_SYSPROTO_H_
1210 struct ocreat_args {
1211 	char	*path;
1212 	int	mode;
1213 };
1214 #endif
1215 int
1216 ocreat(td, uap)
1217 	struct thread *td;
1218 	register struct ocreat_args /* {
1219 		syscallarg(char *) path;
1220 		syscallarg(int) mode;
1221 	} */ *uap;
1222 {
1223 	struct open_args /* {
1224 		syscallarg(char *) path;
1225 		syscallarg(int) flags;
1226 		syscallarg(int) mode;
1227 	} */ nuap;
1228 
1229 	SCARG(&nuap, path) = SCARG(uap, path);
1230 	SCARG(&nuap, mode) = SCARG(uap, mode);
1231 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1232 	return (open(td, &nuap));
1233 }
1234 #endif /* COMPAT_43 */
1235 
1236 /*
1237  * Create a special file.
1238  */
1239 #ifndef _SYS_SYSPROTO_H_
1240 struct mknod_args {
1241 	char	*path;
1242 	int	mode;
1243 	int	dev;
1244 };
1245 #endif
1246 /* ARGSUSED */
1247 int
1248 mknod(td, uap)
1249 	struct thread *td;
1250 	register struct mknod_args /* {
1251 		syscallarg(char *) path;
1252 		syscallarg(int) mode;
1253 		syscallarg(int) dev;
1254 	} */ *uap;
1255 {
1256 	struct vnode *vp;
1257 	struct mount *mp;
1258 	struct vattr vattr;
1259 	int error;
1260 	int whiteout = 0;
1261 	struct nameidata nd;
1262 
1263 	switch (SCARG(uap, mode) & S_IFMT) {
1264 	case S_IFCHR:
1265 	case S_IFBLK:
1266 		error = suser_td(td);
1267 		break;
1268 	default:
1269 		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1270 		break;
1271 	}
1272 	if (error)
1273 		return (error);
1274 restart:
1275 	bwillwrite();
1276 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1277 	if ((error = namei(&nd)) != 0)
1278 		return (error);
1279 	vp = nd.ni_vp;
1280 	if (vp != NULL) {
1281 		vrele(vp);
1282 		error = EEXIST;
1283 	} else {
1284 		VATTR_NULL(&vattr);
1285 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1286 		vattr.va_rdev = SCARG(uap, dev);
1287 		whiteout = 0;
1288 
1289 		switch (SCARG(uap, mode) & S_IFMT) {
1290 		case S_IFMT:	/* used by badsect to flag bad sectors */
1291 			vattr.va_type = VBAD;
1292 			break;
1293 		case S_IFCHR:
1294 			vattr.va_type = VCHR;
1295 			break;
1296 		case S_IFBLK:
1297 			vattr.va_type = VBLK;
1298 			break;
1299 		case S_IFWHT:
1300 			whiteout = 1;
1301 			break;
1302 		default:
1303 			error = EINVAL;
1304 			break;
1305 		}
1306 	}
1307 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1308 		NDFREE(&nd, NDF_ONLY_PNBUF);
1309 		vput(nd.ni_dvp);
1310 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1311 			return (error);
1312 		goto restart;
1313 	}
1314 	if (!error) {
1315 		VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1316 		if (whiteout)
1317 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1318 		else {
1319 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1320 						&nd.ni_cnd, &vattr);
1321 			if (error == 0)
1322 				vput(nd.ni_vp);
1323 		}
1324 	}
1325 	NDFREE(&nd, NDF_ONLY_PNBUF);
1326 	vput(nd.ni_dvp);
1327 	vn_finished_write(mp);
1328 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1329 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1330 	return (error);
1331 }
1332 
1333 /*
1334  * Create a named pipe.
1335  */
1336 #ifndef _SYS_SYSPROTO_H_
1337 struct mkfifo_args {
1338 	char	*path;
1339 	int	mode;
1340 };
1341 #endif
1342 /* ARGSUSED */
1343 int
1344 mkfifo(td, uap)
1345 	struct thread *td;
1346 	register struct mkfifo_args /* {
1347 		syscallarg(char *) path;
1348 		syscallarg(int) mode;
1349 	} */ *uap;
1350 {
1351 	struct mount *mp;
1352 	struct vattr vattr;
1353 	int error;
1354 	struct nameidata nd;
1355 
1356 restart:
1357 	bwillwrite();
1358 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1359 	if ((error = namei(&nd)) != 0)
1360 		return (error);
1361 	if (nd.ni_vp != NULL) {
1362 		NDFREE(&nd, NDF_ONLY_PNBUF);
1363 		vrele(nd.ni_vp);
1364 		vput(nd.ni_dvp);
1365 		return (EEXIST);
1366 	}
1367 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1368 		NDFREE(&nd, NDF_ONLY_PNBUF);
1369 		vput(nd.ni_dvp);
1370 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1371 			return (error);
1372 		goto restart;
1373 	}
1374 	VATTR_NULL(&vattr);
1375 	vattr.va_type = VFIFO;
1376 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1377 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1378 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1379 	if (error == 0)
1380 		vput(nd.ni_vp);
1381 	NDFREE(&nd, NDF_ONLY_PNBUF);
1382 	vput(nd.ni_dvp);
1383 	vn_finished_write(mp);
1384 	return (error);
1385 }
1386 
1387 /*
1388  * Make a hard file link.
1389  */
1390 #ifndef _SYS_SYSPROTO_H_
1391 struct link_args {
1392 	char	*path;
1393 	char	*link;
1394 };
1395 #endif
1396 /* ARGSUSED */
1397 int
1398 link(td, uap)
1399 	struct thread *td;
1400 	register struct link_args /* {
1401 		syscallarg(char *) path;
1402 		syscallarg(char *) link;
1403 	} */ *uap;
1404 {
1405 	struct vnode *vp;
1406 	struct mount *mp;
1407 	struct nameidata nd;
1408 	int error;
1409 
1410 	bwillwrite();
1411 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1412 	if ((error = namei(&nd)) != 0)
1413 		return (error);
1414 	NDFREE(&nd, NDF_ONLY_PNBUF);
1415 	vp = nd.ni_vp;
1416 	if (vp->v_type == VDIR) {
1417 		vrele(vp);
1418 		return (EPERM);		/* POSIX */
1419 	}
1420 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1421 		vrele(vp);
1422 		return (error);
1423 	}
1424 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1425 	if ((error = namei(&nd)) == 0) {
1426 		if (nd.ni_vp != NULL) {
1427 			vrele(nd.ni_vp);
1428 			error = EEXIST;
1429 		} else {
1430 			VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1431 			VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
1432 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1433 		}
1434 		NDFREE(&nd, NDF_ONLY_PNBUF);
1435 		vput(nd.ni_dvp);
1436 	}
1437 	vrele(vp);
1438 	vn_finished_write(mp);
1439 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1440 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1441 	return (error);
1442 }
1443 
1444 /*
1445  * Make a symbolic link.
1446  */
1447 #ifndef _SYS_SYSPROTO_H_
1448 struct symlink_args {
1449 	char	*path;
1450 	char	*link;
1451 };
1452 #endif
1453 /* ARGSUSED */
1454 int
1455 symlink(td, uap)
1456 	struct thread *td;
1457 	register struct symlink_args /* {
1458 		syscallarg(char *) path;
1459 		syscallarg(char *) link;
1460 	} */ *uap;
1461 {
1462 	struct mount *mp;
1463 	struct vattr vattr;
1464 	char *path;
1465 	int error;
1466 	struct nameidata nd;
1467 
1468 	path = zalloc(namei_zone);
1469 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1470 		goto out;
1471 restart:
1472 	bwillwrite();
1473 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1474 	if ((error = namei(&nd)) != 0)
1475 		goto out;
1476 	if (nd.ni_vp) {
1477 		NDFREE(&nd, NDF_ONLY_PNBUF);
1478 		vrele(nd.ni_vp);
1479 		vput(nd.ni_dvp);
1480 		error = EEXIST;
1481 		goto out;
1482 	}
1483 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1484 		NDFREE(&nd, NDF_ONLY_PNBUF);
1485 		vput(nd.ni_dvp);
1486 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1487 			return (error);
1488 		goto restart;
1489 	}
1490 	VATTR_NULL(&vattr);
1491 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1492 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1493 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1494 	NDFREE(&nd, NDF_ONLY_PNBUF);
1495 	if (error == 0)
1496 		vput(nd.ni_vp);
1497 	vput(nd.ni_dvp);
1498 	vn_finished_write(mp);
1499 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1500 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1501 out:
1502 	zfree(namei_zone, path);
1503 	return (error);
1504 }
1505 
1506 /*
1507  * Delete a whiteout from the filesystem.
1508  */
1509 /* ARGSUSED */
1510 int
1511 undelete(td, uap)
1512 	struct thread *td;
1513 	register struct undelete_args /* {
1514 		syscallarg(char *) path;
1515 	} */ *uap;
1516 {
1517 	int error;
1518 	struct mount *mp;
1519 	struct nameidata nd;
1520 
1521 restart:
1522 	bwillwrite();
1523 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1524 	    SCARG(uap, path), td);
1525 	error = namei(&nd);
1526 	if (error)
1527 		return (error);
1528 
1529 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1530 		NDFREE(&nd, NDF_ONLY_PNBUF);
1531 		if (nd.ni_vp)
1532 			vrele(nd.ni_vp);
1533 		vput(nd.ni_dvp);
1534 		return (EEXIST);
1535 	}
1536 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1537 		NDFREE(&nd, NDF_ONLY_PNBUF);
1538 		vput(nd.ni_dvp);
1539 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1540 			return (error);
1541 		goto restart;
1542 	}
1543 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1544 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1545 	NDFREE(&nd, NDF_ONLY_PNBUF);
1546 	vput(nd.ni_dvp);
1547 	vn_finished_write(mp);
1548 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1549 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Delete a name from the filesystem.
1555  */
1556 #ifndef _SYS_SYSPROTO_H_
1557 struct unlink_args {
1558 	char	*path;
1559 };
1560 #endif
1561 /* ARGSUSED */
1562 int
1563 unlink(td, uap)
1564 	struct thread *td;
1565 	struct unlink_args /* {
1566 		syscallarg(char *) path;
1567 	} */ *uap;
1568 {
1569 	struct mount *mp;
1570 	struct vnode *vp;
1571 	int error;
1572 	struct nameidata nd;
1573 
1574 restart:
1575 	bwillwrite();
1576 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1577 	if ((error = namei(&nd)) != 0)
1578 		return (error);
1579 	vp = nd.ni_vp;
1580 	if (vp->v_type == VDIR)
1581 		error = EPERM;		/* POSIX */
1582 	else {
1583 		/*
1584 		 * The root of a mounted filesystem cannot be deleted.
1585 		 *
1586 		 * XXX: can this only be a VDIR case?
1587 		 */
1588 		if (vp->v_flag & VROOT)
1589 			error = EBUSY;
1590 	}
1591 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1592 		NDFREE(&nd, NDF_ONLY_PNBUF);
1593 		vrele(vp);
1594 		vput(nd.ni_dvp);
1595 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1596 			return (error);
1597 		goto restart;
1598 	}
1599 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
1600 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1601 	if (!error) {
1602 		VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1603 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1604 	}
1605 	NDFREE(&nd, NDF_ONLY_PNBUF);
1606 	vput(nd.ni_dvp);
1607 	vput(vp);
1608 	vn_finished_write(mp);
1609 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1610 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1611 	return (error);
1612 }
1613 
1614 /*
1615  * Reposition read/write file offset.
1616  */
1617 #ifndef _SYS_SYSPROTO_H_
1618 struct lseek_args {
1619 	int	fd;
1620 	int	pad;
1621 	off_t	offset;
1622 	int	whence;
1623 };
1624 #endif
1625 int
1626 lseek(td, uap)
1627 	struct thread *td;
1628 	register struct lseek_args /* {
1629 		syscallarg(int) fd;
1630 		syscallarg(int) pad;
1631 		syscallarg(off_t) offset;
1632 		syscallarg(int) whence;
1633 	} */ *uap;
1634 {
1635 	struct ucred *cred = td->td_proc->p_ucred;
1636 	register struct filedesc *fdp = td->td_proc->p_fd;
1637 	register struct file *fp;
1638 	struct vattr vattr;
1639 	struct vnode *vp;
1640 	off_t offset;
1641 	int error, noneg;
1642 
1643 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1644 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1645 		return (EBADF);
1646 	if (fp->f_type != DTYPE_VNODE)
1647 		return (ESPIPE);
1648 	vp = (struct vnode *)fp->f_data;
1649 	noneg = (vp->v_type != VCHR);
1650 	offset = SCARG(uap, offset);
1651 	switch (SCARG(uap, whence)) {
1652 	case L_INCR:
1653 		if (noneg &&
1654 		    (fp->f_offset < 0 ||
1655 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1656 			return (EOVERFLOW);
1657 		offset += fp->f_offset;
1658 		break;
1659 	case L_XTND:
1660 		error = VOP_GETATTR(vp, &vattr, cred, td);
1661 		if (error)
1662 			return (error);
1663 		if (noneg &&
1664 		    (vattr.va_size > OFF_MAX ||
1665 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1666 			return (EOVERFLOW);
1667 		offset += vattr.va_size;
1668 		break;
1669 	case L_SET:
1670 		break;
1671 	default:
1672 		return (EINVAL);
1673 	}
1674 	if (noneg && offset < 0)
1675 		return (EINVAL);
1676 	fp->f_offset = offset;
1677 	*(off_t *)(td->td_retval) = fp->f_offset;
1678 	return (0);
1679 }
1680 
1681 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1682 /*
1683  * Reposition read/write file offset.
1684  */
1685 #ifndef _SYS_SYSPROTO_H_
1686 struct olseek_args {
1687 	int	fd;
1688 	long	offset;
1689 	int	whence;
1690 };
1691 #endif
1692 int
1693 olseek(td, uap)
1694 	struct thread *td;
1695 	register struct olseek_args /* {
1696 		syscallarg(int) fd;
1697 		syscallarg(long) offset;
1698 		syscallarg(int) whence;
1699 	} */ *uap;
1700 {
1701 	struct lseek_args /* {
1702 		syscallarg(int) fd;
1703 		syscallarg(int) pad;
1704 		syscallarg(off_t) offset;
1705 		syscallarg(int) whence;
1706 	} */ nuap;
1707 	int error;
1708 
1709 	SCARG(&nuap, fd) = SCARG(uap, fd);
1710 	SCARG(&nuap, offset) = SCARG(uap, offset);
1711 	SCARG(&nuap, whence) = SCARG(uap, whence);
1712 	error = lseek(td, &nuap);
1713 	return (error);
1714 }
1715 #endif /* COMPAT_43 */
1716 
1717 /*
1718  * Check access permissions using passed credentials.
1719  */
1720 static int
1721 vn_access(vp, user_flags, cred, td)
1722 	struct vnode	*vp;
1723 	int		user_flags;
1724 	struct ucred	*cred;
1725 	struct thread	*td;
1726 {
1727 	int error, flags;
1728 
1729 	/* Flags == 0 means only check for existence. */
1730 	error = 0;
1731 	if (user_flags) {
1732 		flags = 0;
1733 		if (user_flags & R_OK)
1734 			flags |= VREAD;
1735 		if (user_flags & W_OK)
1736 			flags |= VWRITE;
1737 		if (user_flags & X_OK)
1738 			flags |= VEXEC;
1739 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1740 			error = VOP_ACCESS(vp, flags, cred, td);
1741 	}
1742 	return (error);
1743 }
1744 
1745 /*
1746  * Check access permissions using "real" credentials.
1747  */
1748 #ifndef _SYS_SYSPROTO_H_
1749 struct access_args {
1750 	char	*path;
1751 	int	flags;
1752 };
1753 #endif
1754 int
1755 access(td, uap)
1756 	struct thread *td;
1757 	register struct access_args /* {
1758 		syscallarg(char *) path;
1759 		syscallarg(int) flags;
1760 	} */ *uap;
1761 {
1762 	struct ucred *cred, *tmpcred;
1763 	register struct vnode *vp;
1764 	int error;
1765 	struct nameidata nd;
1766 
1767 	cred = td->td_proc->p_ucred;
1768 	/*
1769 	 * Create and modify a temporary credential instead of one that
1770 	 * is potentially shared.  This could also mess up socket
1771 	 * buffer accounting which can run in an interrupt context.
1772 	 *
1773 	 * XXX - Depending on how "threads" are finally implemented, it
1774 	 * may be better to explicitly pass the credential to namei()
1775 	 * rather than to modify the potentially shared process structure.
1776 	 */
1777 	tmpcred = crdup(cred);
1778 	tmpcred->cr_uid = cred->cr_ruid;
1779 	tmpcred->cr_groups[0] = cred->cr_rgid;
1780 	td->td_proc->p_ucred = tmpcred;
1781 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1782 	    SCARG(uap, path), td);
1783 	if ((error = namei(&nd)) != 0)
1784 		goto out1;
1785 	vp = nd.ni_vp;
1786 
1787 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1788 	NDFREE(&nd, NDF_ONLY_PNBUF);
1789 	vput(vp);
1790 out1:
1791 	td->td_proc->p_ucred = cred;
1792 	crfree(tmpcred);
1793 	return (error);
1794 }
1795 
1796 /*
1797  * Check access permissions using "effective" credentials.
1798  */
1799 #ifndef _SYS_SYSPROTO_H_
1800 struct eaccess_args {
1801 	char	*path;
1802 	int	flags;
1803 };
1804 #endif
1805 int
1806 eaccess(td, uap)
1807 	struct thread *td;
1808 	register struct eaccess_args /* {
1809 		syscallarg(char *) path;
1810 		syscallarg(int) flags;
1811 	} */ *uap;
1812 {
1813 	struct nameidata nd;
1814 	struct vnode *vp;
1815 	int error;
1816 
1817 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1818 	    SCARG(uap, path), td);
1819 	if ((error = namei(&nd)) != 0)
1820 		return (error);
1821 	vp = nd.ni_vp;
1822 
1823 	error = vn_access(vp, SCARG(uap, flags), td->td_proc->p_ucred, td);
1824 	NDFREE(&nd, NDF_ONLY_PNBUF);
1825 	vput(vp);
1826 	return (error);
1827 }
1828 
1829 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1830 /*
1831  * Get file status; this version follows links.
1832  */
1833 #ifndef _SYS_SYSPROTO_H_
1834 struct ostat_args {
1835 	char	*path;
1836 	struct ostat *ub;
1837 };
1838 #endif
1839 /* ARGSUSED */
1840 int
1841 ostat(td, uap)
1842 	struct thread *td;
1843 	register struct ostat_args /* {
1844 		syscallarg(char *) path;
1845 		syscallarg(struct ostat *) ub;
1846 	} */ *uap;
1847 {
1848 	struct stat sb;
1849 	struct ostat osb;
1850 	int error;
1851 	struct nameidata nd;
1852 
1853 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1854 	    SCARG(uap, path), td);
1855 	if ((error = namei(&nd)) != 0)
1856 		return (error);
1857 	NDFREE(&nd, NDF_ONLY_PNBUF);
1858 	error = vn_stat(nd.ni_vp, &sb, td);
1859 	vput(nd.ni_vp);
1860 	if (error)
1861 		return (error);
1862 	cvtstat(&sb, &osb);
1863 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1864 	return (error);
1865 }
1866 
1867 /*
1868  * Get file status; this version does not follow links.
1869  */
1870 #ifndef _SYS_SYSPROTO_H_
1871 struct olstat_args {
1872 	char	*path;
1873 	struct ostat *ub;
1874 };
1875 #endif
1876 /* ARGSUSED */
1877 int
1878 olstat(td, uap)
1879 	struct thread *td;
1880 	register struct olstat_args /* {
1881 		syscallarg(char *) path;
1882 		syscallarg(struct ostat *) ub;
1883 	} */ *uap;
1884 {
1885 	struct vnode *vp;
1886 	struct stat sb;
1887 	struct ostat osb;
1888 	int error;
1889 	struct nameidata nd;
1890 
1891 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1892 	    SCARG(uap, path), td);
1893 	if ((error = namei(&nd)) != 0)
1894 		return (error);
1895 	vp = nd.ni_vp;
1896 	error = vn_stat(vp, &sb, td);
1897 	NDFREE(&nd, NDF_ONLY_PNBUF);
1898 	vput(vp);
1899 	if (error)
1900 		return (error);
1901 	cvtstat(&sb, &osb);
1902 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1903 	return (error);
1904 }
1905 
1906 /*
1907  * Convert from an old to a new stat structure.
1908  */
1909 void
1910 cvtstat(st, ost)
1911 	struct stat *st;
1912 	struct ostat *ost;
1913 {
1914 
1915 	ost->st_dev = st->st_dev;
1916 	ost->st_ino = st->st_ino;
1917 	ost->st_mode = st->st_mode;
1918 	ost->st_nlink = st->st_nlink;
1919 	ost->st_uid = st->st_uid;
1920 	ost->st_gid = st->st_gid;
1921 	ost->st_rdev = st->st_rdev;
1922 	if (st->st_size < (quad_t)1 << 32)
1923 		ost->st_size = st->st_size;
1924 	else
1925 		ost->st_size = -2;
1926 	ost->st_atime = st->st_atime;
1927 	ost->st_mtime = st->st_mtime;
1928 	ost->st_ctime = st->st_ctime;
1929 	ost->st_blksize = st->st_blksize;
1930 	ost->st_blocks = st->st_blocks;
1931 	ost->st_flags = st->st_flags;
1932 	ost->st_gen = st->st_gen;
1933 }
1934 #endif /* COMPAT_43 || COMPAT_SUNOS */
1935 
1936 /*
1937  * Get file status; this version follows links.
1938  */
1939 #ifndef _SYS_SYSPROTO_H_
1940 struct stat_args {
1941 	char	*path;
1942 	struct stat *ub;
1943 };
1944 #endif
1945 /* ARGSUSED */
1946 int
1947 stat(td, uap)
1948 	struct thread *td;
1949 	register struct stat_args /* {
1950 		syscallarg(char *) path;
1951 		syscallarg(struct stat *) ub;
1952 	} */ *uap;
1953 {
1954 	struct stat sb;
1955 	int error;
1956 	struct nameidata nd;
1957 
1958 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1959 	    SCARG(uap, path), td);
1960 	if ((error = namei(&nd)) != 0)
1961 		return (error);
1962 	error = vn_stat(nd.ni_vp, &sb, td);
1963 	NDFREE(&nd, NDF_ONLY_PNBUF);
1964 	vput(nd.ni_vp);
1965 	if (error)
1966 		return (error);
1967 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1968 	return (error);
1969 }
1970 
1971 /*
1972  * Get file status; this version does not follow links.
1973  */
1974 #ifndef _SYS_SYSPROTO_H_
1975 struct lstat_args {
1976 	char	*path;
1977 	struct stat *ub;
1978 };
1979 #endif
1980 /* ARGSUSED */
1981 int
1982 lstat(td, uap)
1983 	struct thread *td;
1984 	register struct lstat_args /* {
1985 		syscallarg(char *) path;
1986 		syscallarg(struct stat *) ub;
1987 	} */ *uap;
1988 {
1989 	int error;
1990 	struct vnode *vp;
1991 	struct stat sb;
1992 	struct nameidata nd;
1993 
1994 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1995 	    SCARG(uap, path), td);
1996 	if ((error = namei(&nd)) != 0)
1997 		return (error);
1998 	vp = nd.ni_vp;
1999 	error = vn_stat(vp, &sb, td);
2000 	NDFREE(&nd, NDF_ONLY_PNBUF);
2001 	vput(vp);
2002 	if (error)
2003 		return (error);
2004 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2005 	return (error);
2006 }
2007 
2008 /*
2009  * Implementation of the NetBSD stat() function.
2010  * XXX This should probably be collapsed with the FreeBSD version,
2011  * as the differences are only due to vn_stat() clearing spares at
2012  * the end of the structures.  vn_stat could be split to avoid this,
2013  * and thus collapse the following to close to zero code.
2014  */
2015 void
2016 cvtnstat(sb, nsb)
2017 	struct stat *sb;
2018 	struct nstat *nsb;
2019 {
2020 	nsb->st_dev = sb->st_dev;
2021 	nsb->st_ino = sb->st_ino;
2022 	nsb->st_mode = sb->st_mode;
2023 	nsb->st_nlink = sb->st_nlink;
2024 	nsb->st_uid = sb->st_uid;
2025 	nsb->st_gid = sb->st_gid;
2026 	nsb->st_rdev = sb->st_rdev;
2027 	nsb->st_atimespec = sb->st_atimespec;
2028 	nsb->st_mtimespec = sb->st_mtimespec;
2029 	nsb->st_ctimespec = sb->st_ctimespec;
2030 	nsb->st_size = sb->st_size;
2031 	nsb->st_blocks = sb->st_blocks;
2032 	nsb->st_blksize = sb->st_blksize;
2033 	nsb->st_flags = sb->st_flags;
2034 	nsb->st_gen = sb->st_gen;
2035 	nsb->st_qspare[0] = sb->st_qspare[0];
2036 	nsb->st_qspare[1] = sb->st_qspare[1];
2037 }
2038 
2039 #ifndef _SYS_SYSPROTO_H_
2040 struct nstat_args {
2041 	char	*path;
2042 	struct nstat *ub;
2043 };
2044 #endif
2045 /* ARGSUSED */
2046 int
2047 nstat(td, uap)
2048 	struct thread *td;
2049 	register struct nstat_args /* {
2050 		syscallarg(char *) path;
2051 		syscallarg(struct nstat *) ub;
2052 	} */ *uap;
2053 {
2054 	struct stat sb;
2055 	struct nstat nsb;
2056 	int error;
2057 	struct nameidata nd;
2058 
2059 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2060 	    SCARG(uap, path), td);
2061 	if ((error = namei(&nd)) != 0)
2062 		return (error);
2063 	NDFREE(&nd, NDF_ONLY_PNBUF);
2064 	error = vn_stat(nd.ni_vp, &sb, td);
2065 	vput(nd.ni_vp);
2066 	if (error)
2067 		return (error);
2068 	cvtnstat(&sb, &nsb);
2069 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2070 	return (error);
2071 }
2072 
2073 /*
2074  * NetBSD lstat.  Get file status; this version does not follow links.
2075  */
2076 #ifndef _SYS_SYSPROTO_H_
2077 struct lstat_args {
2078 	char	*path;
2079 	struct stat *ub;
2080 };
2081 #endif
2082 /* ARGSUSED */
2083 int
2084 nlstat(td, uap)
2085 	struct thread *td;
2086 	register struct nlstat_args /* {
2087 		syscallarg(char *) path;
2088 		syscallarg(struct nstat *) ub;
2089 	} */ *uap;
2090 {
2091 	int error;
2092 	struct vnode *vp;
2093 	struct stat sb;
2094 	struct nstat nsb;
2095 	struct nameidata nd;
2096 
2097 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2098 	    SCARG(uap, path), td);
2099 	if ((error = namei(&nd)) != 0)
2100 		return (error);
2101 	vp = nd.ni_vp;
2102 	NDFREE(&nd, NDF_ONLY_PNBUF);
2103 	error = vn_stat(vp, &sb, td);
2104 	vput(vp);
2105 	if (error)
2106 		return (error);
2107 	cvtnstat(&sb, &nsb);
2108 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2109 	return (error);
2110 }
2111 
2112 /*
2113  * Get configurable pathname variables.
2114  */
2115 #ifndef _SYS_SYSPROTO_H_
2116 struct pathconf_args {
2117 	char	*path;
2118 	int	name;
2119 };
2120 #endif
2121 /* ARGSUSED */
2122 int
2123 pathconf(td, uap)
2124 	struct thread *td;
2125 	register struct pathconf_args /* {
2126 		syscallarg(char *) path;
2127 		syscallarg(int) name;
2128 	} */ *uap;
2129 {
2130 	int error;
2131 	struct nameidata nd;
2132 
2133 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2134 	    SCARG(uap, path), td);
2135 	if ((error = namei(&nd)) != 0)
2136 		return (error);
2137 	NDFREE(&nd, NDF_ONLY_PNBUF);
2138 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2139 	vput(nd.ni_vp);
2140 	return (error);
2141 }
2142 
2143 /*
2144  * Return target name of a symbolic link.
2145  */
2146 #ifndef _SYS_SYSPROTO_H_
2147 struct readlink_args {
2148 	char	*path;
2149 	char	*buf;
2150 	int	count;
2151 };
2152 #endif
2153 /* ARGSUSED */
2154 int
2155 readlink(td, uap)
2156 	struct thread *td;
2157 	register struct readlink_args /* {
2158 		syscallarg(char *) path;
2159 		syscallarg(char *) buf;
2160 		syscallarg(int) count;
2161 	} */ *uap;
2162 {
2163 	register struct vnode *vp;
2164 	struct iovec aiov;
2165 	struct uio auio;
2166 	int error;
2167 	struct nameidata nd;
2168 
2169 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2170 	    SCARG(uap, path), td);
2171 	if ((error = namei(&nd)) != 0)
2172 		return (error);
2173 	NDFREE(&nd, NDF_ONLY_PNBUF);
2174 	vp = nd.ni_vp;
2175 	if (vp->v_type != VLNK)
2176 		error = EINVAL;
2177 	else {
2178 		aiov.iov_base = SCARG(uap, buf);
2179 		aiov.iov_len = SCARG(uap, count);
2180 		auio.uio_iov = &aiov;
2181 		auio.uio_iovcnt = 1;
2182 		auio.uio_offset = 0;
2183 		auio.uio_rw = UIO_READ;
2184 		auio.uio_segflg = UIO_USERSPACE;
2185 		auio.uio_td = td;
2186 		auio.uio_resid = SCARG(uap, count);
2187 		error = VOP_READLINK(vp, &auio, td->td_proc->p_ucred);
2188 	}
2189 	vput(vp);
2190 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2191 	return (error);
2192 }
2193 
2194 /*
2195  * Common implementation code for chflags() and fchflags().
2196  */
2197 static int
2198 setfflags(td, vp, flags)
2199 	struct thread *td;
2200 	struct vnode *vp;
2201 	int flags;
2202 {
2203 	int error;
2204 	struct mount *mp;
2205 	struct vattr vattr;
2206 
2207 	/*
2208 	 * Prevent non-root users from setting flags on devices.  When
2209 	 * a device is reused, users can retain ownership of the device
2210 	 * if they are allowed to set flags and programs assume that
2211 	 * chown can't fail when done as root.
2212 	 */
2213 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2214 		error = suser_xxx(td->td_proc->p_ucred, td->td_proc,
2215 		    PRISON_ROOT);
2216 		if (error)
2217 			return (error);
2218 	}
2219 
2220 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2221 		return (error);
2222 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2223 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2224 	VATTR_NULL(&vattr);
2225 	vattr.va_flags = flags;
2226 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2227 	VOP_UNLOCK(vp, 0, td);
2228 	vn_finished_write(mp);
2229 	return (error);
2230 }
2231 
2232 /*
2233  * Change flags of a file given a path name.
2234  */
2235 #ifndef _SYS_SYSPROTO_H_
2236 struct chflags_args {
2237 	char	*path;
2238 	int	flags;
2239 };
2240 #endif
2241 /* ARGSUSED */
2242 int
2243 chflags(td, uap)
2244 	struct thread *td;
2245 	register struct chflags_args /* {
2246 		syscallarg(char *) path;
2247 		syscallarg(int) flags;
2248 	} */ *uap;
2249 {
2250 	int error;
2251 	struct nameidata nd;
2252 
2253 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2254 	if ((error = namei(&nd)) != 0)
2255 		return (error);
2256 	NDFREE(&nd, NDF_ONLY_PNBUF);
2257 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2258 	vrele(nd.ni_vp);
2259 	return error;
2260 }
2261 
2262 /*
2263  * Change flags of a file given a file descriptor.
2264  */
2265 #ifndef _SYS_SYSPROTO_H_
2266 struct fchflags_args {
2267 	int	fd;
2268 	int	flags;
2269 };
2270 #endif
2271 /* ARGSUSED */
2272 int
2273 fchflags(td, uap)
2274 	struct thread *td;
2275 	register struct fchflags_args /* {
2276 		syscallarg(int) fd;
2277 		syscallarg(int) flags;
2278 	} */ *uap;
2279 {
2280 	struct file *fp;
2281 	int error;
2282 
2283 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2284 		return (error);
2285 	return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2286 }
2287 
2288 /*
2289  * Common implementation code for chmod(), lchmod() and fchmod().
2290  */
2291 static int
2292 setfmode(td, vp, mode)
2293 	struct thread *td;
2294 	struct vnode *vp;
2295 	int mode;
2296 {
2297 	int error;
2298 	struct mount *mp;
2299 	struct vattr vattr;
2300 
2301 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2302 		return (error);
2303 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2304 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2305 	VATTR_NULL(&vattr);
2306 	vattr.va_mode = mode & ALLPERMS;
2307 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2308 	VOP_UNLOCK(vp, 0, td);
2309 	vn_finished_write(mp);
2310 	return error;
2311 }
2312 
2313 /*
2314  * Change mode of a file given path name.
2315  */
2316 #ifndef _SYS_SYSPROTO_H_
2317 struct chmod_args {
2318 	char	*path;
2319 	int	mode;
2320 };
2321 #endif
2322 /* ARGSUSED */
2323 int
2324 chmod(td, uap)
2325 	struct thread *td;
2326 	register struct chmod_args /* {
2327 		syscallarg(char *) path;
2328 		syscallarg(int) mode;
2329 	} */ *uap;
2330 {
2331 	int error;
2332 	struct nameidata nd;
2333 
2334 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2335 	if ((error = namei(&nd)) != 0)
2336 		return (error);
2337 	NDFREE(&nd, NDF_ONLY_PNBUF);
2338 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2339 	vrele(nd.ni_vp);
2340 	return error;
2341 }
2342 
2343 /*
2344  * Change mode of a file given path name (don't follow links.)
2345  */
2346 #ifndef _SYS_SYSPROTO_H_
2347 struct lchmod_args {
2348 	char	*path;
2349 	int	mode;
2350 };
2351 #endif
2352 /* ARGSUSED */
2353 int
2354 lchmod(td, uap)
2355 	struct thread *td;
2356 	register struct lchmod_args /* {
2357 		syscallarg(char *) path;
2358 		syscallarg(int) mode;
2359 	} */ *uap;
2360 {
2361 	int error;
2362 	struct nameidata nd;
2363 
2364 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2365 	if ((error = namei(&nd)) != 0)
2366 		return (error);
2367 	NDFREE(&nd, NDF_ONLY_PNBUF);
2368 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2369 	vrele(nd.ni_vp);
2370 	return error;
2371 }
2372 
2373 /*
2374  * Change mode of a file given a file descriptor.
2375  */
2376 #ifndef _SYS_SYSPROTO_H_
2377 struct fchmod_args {
2378 	int	fd;
2379 	int	mode;
2380 };
2381 #endif
2382 /* ARGSUSED */
2383 int
2384 fchmod(td, uap)
2385 	struct thread *td;
2386 	register struct fchmod_args /* {
2387 		syscallarg(int) fd;
2388 		syscallarg(int) mode;
2389 	} */ *uap;
2390 {
2391 	struct file *fp;
2392 	int error;
2393 
2394 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2395 		return (error);
2396 	return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2397 }
2398 
2399 /*
2400  * Common implementation for chown(), lchown(), and fchown()
2401  */
2402 static int
2403 setfown(td, vp, uid, gid)
2404 	struct thread *td;
2405 	struct vnode *vp;
2406 	uid_t uid;
2407 	gid_t gid;
2408 {
2409 	int error;
2410 	struct mount *mp;
2411 	struct vattr vattr;
2412 
2413 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2414 		return (error);
2415 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2416 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2417 	VATTR_NULL(&vattr);
2418 	vattr.va_uid = uid;
2419 	vattr.va_gid = gid;
2420 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2421 	VOP_UNLOCK(vp, 0, td);
2422 	vn_finished_write(mp);
2423 	return error;
2424 }
2425 
2426 /*
2427  * Set ownership given a path name.
2428  */
2429 #ifndef _SYS_SYSPROTO_H_
2430 struct chown_args {
2431 	char	*path;
2432 	int	uid;
2433 	int	gid;
2434 };
2435 #endif
2436 /* ARGSUSED */
2437 int
2438 chown(td, uap)
2439 	struct thread *td;
2440 	register struct chown_args /* {
2441 		syscallarg(char *) path;
2442 		syscallarg(int) uid;
2443 		syscallarg(int) gid;
2444 	} */ *uap;
2445 {
2446 	int error;
2447 	struct nameidata nd;
2448 
2449 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2450 	if ((error = namei(&nd)) != 0)
2451 		return (error);
2452 	NDFREE(&nd, NDF_ONLY_PNBUF);
2453 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2454 	vrele(nd.ni_vp);
2455 	return (error);
2456 }
2457 
2458 /*
2459  * Set ownership given a path name, do not cross symlinks.
2460  */
2461 #ifndef _SYS_SYSPROTO_H_
2462 struct lchown_args {
2463 	char	*path;
2464 	int	uid;
2465 	int	gid;
2466 };
2467 #endif
2468 /* ARGSUSED */
2469 int
2470 lchown(td, uap)
2471 	struct thread *td;
2472 	register struct lchown_args /* {
2473 		syscallarg(char *) path;
2474 		syscallarg(int) uid;
2475 		syscallarg(int) gid;
2476 	} */ *uap;
2477 {
2478 	int error;
2479 	struct nameidata nd;
2480 
2481 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2482 	if ((error = namei(&nd)) != 0)
2483 		return (error);
2484 	NDFREE(&nd, NDF_ONLY_PNBUF);
2485 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2486 	vrele(nd.ni_vp);
2487 	return (error);
2488 }
2489 
2490 /*
2491  * Set ownership given a file descriptor.
2492  */
2493 #ifndef _SYS_SYSPROTO_H_
2494 struct fchown_args {
2495 	int	fd;
2496 	int	uid;
2497 	int	gid;
2498 };
2499 #endif
2500 /* ARGSUSED */
2501 int
2502 fchown(td, uap)
2503 	struct thread *td;
2504 	register struct fchown_args /* {
2505 		syscallarg(int) fd;
2506 		syscallarg(int) uid;
2507 		syscallarg(int) gid;
2508 	} */ *uap;
2509 {
2510 	struct file *fp;
2511 	int error;
2512 
2513 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2514 		return (error);
2515 	return setfown(td, (struct vnode *)fp->f_data,
2516 		SCARG(uap, uid), SCARG(uap, gid));
2517 }
2518 
2519 /*
2520  * Common implementation code for utimes(), lutimes(), and futimes().
2521  */
2522 static int
2523 getutimes(usrtvp, tsp)
2524 	const struct timeval *usrtvp;
2525 	struct timespec *tsp;
2526 {
2527 	struct timeval tv[2];
2528 	int error;
2529 
2530 	if (usrtvp == NULL) {
2531 		microtime(&tv[0]);
2532 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2533 		tsp[1] = tsp[0];
2534 	} else {
2535 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2536 			return (error);
2537 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2538 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2539 	}
2540 	return 0;
2541 }
2542 
2543 /*
2544  * Common implementation code for utimes(), lutimes(), and futimes().
2545  */
2546 static int
2547 setutimes(td, vp, ts, nullflag)
2548 	struct thread *td;
2549 	struct vnode *vp;
2550 	const struct timespec *ts;
2551 	int nullflag;
2552 {
2553 	int error;
2554 	struct mount *mp;
2555 	struct vattr vattr;
2556 
2557 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2558 		return (error);
2559 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2560 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2561 	VATTR_NULL(&vattr);
2562 	vattr.va_atime = ts[0];
2563 	vattr.va_mtime = ts[1];
2564 	if (nullflag)
2565 		vattr.va_vaflags |= VA_UTIMES_NULL;
2566 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2567 	VOP_UNLOCK(vp, 0, td);
2568 	vn_finished_write(mp);
2569 	return error;
2570 }
2571 
2572 /*
2573  * Set the access and modification times of a file.
2574  */
2575 #ifndef _SYS_SYSPROTO_H_
2576 struct utimes_args {
2577 	char	*path;
2578 	struct	timeval *tptr;
2579 };
2580 #endif
2581 /* ARGSUSED */
2582 int
2583 utimes(td, uap)
2584 	struct thread *td;
2585 	register struct utimes_args /* {
2586 		syscallarg(char *) path;
2587 		syscallarg(struct timeval *) tptr;
2588 	} */ *uap;
2589 {
2590 	struct timespec ts[2];
2591 	struct timeval *usrtvp;
2592 	int error;
2593 	struct nameidata nd;
2594 
2595 	usrtvp = SCARG(uap, tptr);
2596 	if ((error = getutimes(usrtvp, ts)) != 0)
2597 		return (error);
2598 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2599 	if ((error = namei(&nd)) != 0)
2600 		return (error);
2601 	NDFREE(&nd, NDF_ONLY_PNBUF);
2602 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2603 	vrele(nd.ni_vp);
2604 	return (error);
2605 }
2606 
2607 /*
2608  * Set the access and modification times of a file.
2609  */
2610 #ifndef _SYS_SYSPROTO_H_
2611 struct lutimes_args {
2612 	char	*path;
2613 	struct	timeval *tptr;
2614 };
2615 #endif
2616 /* ARGSUSED */
2617 int
2618 lutimes(td, uap)
2619 	struct thread *td;
2620 	register struct lutimes_args /* {
2621 		syscallarg(char *) path;
2622 		syscallarg(struct timeval *) tptr;
2623 	} */ *uap;
2624 {
2625 	struct timespec ts[2];
2626 	struct timeval *usrtvp;
2627 	int error;
2628 	struct nameidata nd;
2629 
2630 	usrtvp = SCARG(uap, tptr);
2631 	if ((error = getutimes(usrtvp, ts)) != 0)
2632 		return (error);
2633 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2634 	if ((error = namei(&nd)) != 0)
2635 		return (error);
2636 	NDFREE(&nd, NDF_ONLY_PNBUF);
2637 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2638 	vrele(nd.ni_vp);
2639 	return (error);
2640 }
2641 
2642 /*
2643  * Set the access and modification times of a file.
2644  */
2645 #ifndef _SYS_SYSPROTO_H_
2646 struct futimes_args {
2647 	int	fd;
2648 	struct	timeval *tptr;
2649 };
2650 #endif
2651 /* ARGSUSED */
2652 int
2653 futimes(td, uap)
2654 	struct thread *td;
2655 	register struct futimes_args /* {
2656 		syscallarg(int ) fd;
2657 		syscallarg(struct timeval *) tptr;
2658 	} */ *uap;
2659 {
2660 	struct timespec ts[2];
2661 	struct file *fp;
2662 	struct timeval *usrtvp;
2663 	int error;
2664 
2665 	usrtvp = SCARG(uap, tptr);
2666 	if ((error = getutimes(usrtvp, ts)) != 0)
2667 		return (error);
2668 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2669 		return (error);
2670 	return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2671 }
2672 
2673 /*
2674  * Truncate a file given its path name.
2675  */
2676 #ifndef _SYS_SYSPROTO_H_
2677 struct truncate_args {
2678 	char	*path;
2679 	int	pad;
2680 	off_t	length;
2681 };
2682 #endif
2683 /* ARGSUSED */
2684 int
2685 truncate(td, uap)
2686 	struct thread *td;
2687 	register struct truncate_args /* {
2688 		syscallarg(char *) path;
2689 		syscallarg(int) pad;
2690 		syscallarg(off_t) length;
2691 	} */ *uap;
2692 {
2693 	struct mount *mp;
2694 	struct vnode *vp;
2695 	struct vattr vattr;
2696 	int error;
2697 	struct nameidata nd;
2698 
2699 	if (uap->length < 0)
2700 		return(EINVAL);
2701 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2702 	if ((error = namei(&nd)) != 0)
2703 		return (error);
2704 	vp = nd.ni_vp;
2705 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2706 		vrele(vp);
2707 		return (error);
2708 	}
2709 	NDFREE(&nd, NDF_ONLY_PNBUF);
2710 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2711 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2712 	if (vp->v_type == VDIR)
2713 		error = EISDIR;
2714 	else if ((error = vn_writechk(vp)) == 0 &&
2715 	    (error = VOP_ACCESS(vp, VWRITE, td->td_proc->p_ucred, td)) == 0) {
2716 		VATTR_NULL(&vattr);
2717 		vattr.va_size = SCARG(uap, length);
2718 		error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2719 	}
2720 	vput(vp);
2721 	vn_finished_write(mp);
2722 	return (error);
2723 }
2724 
2725 /*
2726  * Truncate a file given a file descriptor.
2727  */
2728 #ifndef _SYS_SYSPROTO_H_
2729 struct ftruncate_args {
2730 	int	fd;
2731 	int	pad;
2732 	off_t	length;
2733 };
2734 #endif
2735 /* ARGSUSED */
2736 int
2737 ftruncate(td, uap)
2738 	struct thread *td;
2739 	register struct ftruncate_args /* {
2740 		syscallarg(int) fd;
2741 		syscallarg(int) pad;
2742 		syscallarg(off_t) length;
2743 	} */ *uap;
2744 {
2745 	struct mount *mp;
2746 	struct vattr vattr;
2747 	struct vnode *vp;
2748 	struct file *fp;
2749 	int error;
2750 
2751 	if (uap->length < 0)
2752 		return(EINVAL);
2753 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2754 		return (error);
2755 	if ((fp->f_flag & FWRITE) == 0)
2756 		return (EINVAL);
2757 	vp = (struct vnode *)fp->f_data;
2758 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2759 		return (error);
2760 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2761 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2762 	if (vp->v_type == VDIR)
2763 		error = EISDIR;
2764 	else if ((error = vn_writechk(vp)) == 0) {
2765 		VATTR_NULL(&vattr);
2766 		vattr.va_size = SCARG(uap, length);
2767 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2768 	}
2769 	VOP_UNLOCK(vp, 0, td);
2770 	vn_finished_write(mp);
2771 	return (error);
2772 }
2773 
2774 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2775 /*
2776  * Truncate a file given its path name.
2777  */
2778 #ifndef _SYS_SYSPROTO_H_
2779 struct otruncate_args {
2780 	char	*path;
2781 	long	length;
2782 };
2783 #endif
2784 /* ARGSUSED */
2785 int
2786 otruncate(td, uap)
2787 	struct thread *td;
2788 	register struct otruncate_args /* {
2789 		syscallarg(char *) path;
2790 		syscallarg(long) length;
2791 	} */ *uap;
2792 {
2793 	struct truncate_args /* {
2794 		syscallarg(char *) path;
2795 		syscallarg(int) pad;
2796 		syscallarg(off_t) length;
2797 	} */ nuap;
2798 
2799 	SCARG(&nuap, path) = SCARG(uap, path);
2800 	SCARG(&nuap, length) = SCARG(uap, length);
2801 	return (truncate(td, &nuap));
2802 }
2803 
2804 /*
2805  * Truncate a file given a file descriptor.
2806  */
2807 #ifndef _SYS_SYSPROTO_H_
2808 struct oftruncate_args {
2809 	int	fd;
2810 	long	length;
2811 };
2812 #endif
2813 /* ARGSUSED */
2814 int
2815 oftruncate(td, uap)
2816 	struct thread *td;
2817 	register struct oftruncate_args /* {
2818 		syscallarg(int) fd;
2819 		syscallarg(long) length;
2820 	} */ *uap;
2821 {
2822 	struct ftruncate_args /* {
2823 		syscallarg(int) fd;
2824 		syscallarg(int) pad;
2825 		syscallarg(off_t) length;
2826 	} */ nuap;
2827 
2828 	SCARG(&nuap, fd) = SCARG(uap, fd);
2829 	SCARG(&nuap, length) = SCARG(uap, length);
2830 	return (ftruncate(td, &nuap));
2831 }
2832 #endif /* COMPAT_43 || COMPAT_SUNOS */
2833 
2834 /*
2835  * Sync an open file.
2836  */
2837 #ifndef _SYS_SYSPROTO_H_
2838 struct fsync_args {
2839 	int	fd;
2840 };
2841 #endif
2842 /* ARGSUSED */
2843 int
2844 fsync(td, uap)
2845 	struct thread *td;
2846 	struct fsync_args /* {
2847 		syscallarg(int) fd;
2848 	} */ *uap;
2849 {
2850 	struct vnode *vp;
2851 	struct mount *mp;
2852 	struct file *fp;
2853 	vm_object_t obj;
2854 	int error;
2855 
2856 	GIANT_REQUIRED;
2857 
2858 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2859 		return (error);
2860 	vp = (struct vnode *)fp->f_data;
2861 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2862 		return (error);
2863 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2864 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2865 		vm_object_page_clean(obj, 0, 0, 0);
2866 	}
2867 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2868 #ifdef SOFTUPDATES
2869 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2870 	    error = softdep_fsync(vp);
2871 #endif
2872 
2873 	VOP_UNLOCK(vp, 0, td);
2874 	vn_finished_write(mp);
2875 	return (error);
2876 }
2877 
2878 /*
2879  * Rename files.  Source and destination must either both be directories,
2880  * or both not be directories.  If target is a directory, it must be empty.
2881  */
2882 #ifndef _SYS_SYSPROTO_H_
2883 struct rename_args {
2884 	char	*from;
2885 	char	*to;
2886 };
2887 #endif
2888 /* ARGSUSED */
2889 int
2890 rename(td, uap)
2891 	struct thread *td;
2892 	register struct rename_args /* {
2893 		syscallarg(char *) from;
2894 		syscallarg(char *) to;
2895 	} */ *uap;
2896 {
2897 	struct mount *mp;
2898 	struct vnode *tvp, *fvp, *tdvp;
2899 	struct nameidata fromnd, tond;
2900 	int error;
2901 
2902 	bwillwrite();
2903 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2904 	    SCARG(uap, from), td);
2905 	if ((error = namei(&fromnd)) != 0)
2906 		return (error);
2907 	fvp = fromnd.ni_vp;
2908 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2909 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2910 		vrele(fromnd.ni_dvp);
2911 		vrele(fvp);
2912 		goto out1;
2913 	}
2914 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2915 	    UIO_USERSPACE, SCARG(uap, to), td);
2916 	if (fromnd.ni_vp->v_type == VDIR)
2917 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2918 	if ((error = namei(&tond)) != 0) {
2919 		/* Translate error code for rename("dir1", "dir2/."). */
2920 		if (error == EISDIR && fvp->v_type == VDIR)
2921 			error = EINVAL;
2922 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2923 		vrele(fromnd.ni_dvp);
2924 		vrele(fvp);
2925 		goto out1;
2926 	}
2927 	tdvp = tond.ni_dvp;
2928 	tvp = tond.ni_vp;
2929 	if (tvp != NULL) {
2930 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2931 			error = ENOTDIR;
2932 			goto out;
2933 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2934 			error = EISDIR;
2935 			goto out;
2936 		}
2937 	}
2938 	if (fvp == tdvp)
2939 		error = EINVAL;
2940 	/*
2941 	 * If source is the same as the destination (that is the
2942 	 * same inode number with the same name in the same directory),
2943 	 * then there is nothing to do.
2944 	 */
2945 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2946 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2947 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2948 	      fromnd.ni_cnd.cn_namelen))
2949 		error = -1;
2950 out:
2951 	if (!error) {
2952 		VOP_LEASE(tdvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2953 		if (fromnd.ni_dvp != tdvp) {
2954 			VOP_LEASE(fromnd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2955 		}
2956 		if (tvp) {
2957 			VOP_LEASE(tvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2958 		}
2959 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2960 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2961 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2962 		NDFREE(&tond, NDF_ONLY_PNBUF);
2963 	} else {
2964 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2965 		NDFREE(&tond, NDF_ONLY_PNBUF);
2966 		if (tdvp == tvp)
2967 			vrele(tdvp);
2968 		else
2969 			vput(tdvp);
2970 		if (tvp)
2971 			vput(tvp);
2972 		vrele(fromnd.ni_dvp);
2973 		vrele(fvp);
2974 	}
2975 	vrele(tond.ni_startdir);
2976 	vn_finished_write(mp);
2977 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2978 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2979 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2980 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2981 out1:
2982 	if (fromnd.ni_startdir)
2983 		vrele(fromnd.ni_startdir);
2984 	if (error == -1)
2985 		return (0);
2986 	return (error);
2987 }
2988 
2989 /*
2990  * Make a directory file.
2991  */
2992 #ifndef _SYS_SYSPROTO_H_
2993 struct mkdir_args {
2994 	char	*path;
2995 	int	mode;
2996 };
2997 #endif
2998 /* ARGSUSED */
2999 int
3000 mkdir(td, uap)
3001 	struct thread *td;
3002 	register struct mkdir_args /* {
3003 		syscallarg(char *) path;
3004 		syscallarg(int) mode;
3005 	} */ *uap;
3006 {
3007 
3008 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3009 }
3010 
3011 int
3012 vn_mkdir(path, mode, segflg, td)
3013 	char *path;
3014 	int mode;
3015 	enum uio_seg segflg;
3016 	struct thread *td;
3017 {
3018 	struct mount *mp;
3019 	struct vnode *vp;
3020 	struct vattr vattr;
3021 	int error;
3022 	struct nameidata nd;
3023 
3024 restart:
3025 	bwillwrite();
3026 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3027 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3028 	if ((error = namei(&nd)) != 0)
3029 		return (error);
3030 	vp = nd.ni_vp;
3031 	if (vp != NULL) {
3032 		NDFREE(&nd, NDF_ONLY_PNBUF);
3033 		vrele(vp);
3034 		vput(nd.ni_dvp);
3035 		return (EEXIST);
3036 	}
3037 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3038 		NDFREE(&nd, NDF_ONLY_PNBUF);
3039 		vput(nd.ni_dvp);
3040 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3041 			return (error);
3042 		goto restart;
3043 	}
3044 	VATTR_NULL(&vattr);
3045 	vattr.va_type = VDIR;
3046 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3047 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
3048 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3049 	NDFREE(&nd, NDF_ONLY_PNBUF);
3050 	vput(nd.ni_dvp);
3051 	if (!error)
3052 		vput(nd.ni_vp);
3053 	vn_finished_write(mp);
3054 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3055 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3056 	return (error);
3057 }
3058 
3059 /*
3060  * Remove a directory file.
3061  */
3062 #ifndef _SYS_SYSPROTO_H_
3063 struct rmdir_args {
3064 	char	*path;
3065 };
3066 #endif
3067 /* ARGSUSED */
3068 int
3069 rmdir(td, uap)
3070 	struct thread *td;
3071 	struct rmdir_args /* {
3072 		syscallarg(char *) path;
3073 	} */ *uap;
3074 {
3075 	struct mount *mp;
3076 	struct vnode *vp;
3077 	int error;
3078 	struct nameidata nd;
3079 
3080 restart:
3081 	bwillwrite();
3082 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3083 	    SCARG(uap, path), td);
3084 	if ((error = namei(&nd)) != 0)
3085 		return (error);
3086 	vp = nd.ni_vp;
3087 	if (vp->v_type != VDIR) {
3088 		error = ENOTDIR;
3089 		goto out;
3090 	}
3091 	/*
3092 	 * No rmdir "." please.
3093 	 */
3094 	if (nd.ni_dvp == vp) {
3095 		error = EINVAL;
3096 		goto out;
3097 	}
3098 	/*
3099 	 * The root of a mounted filesystem cannot be deleted.
3100 	 */
3101 	if (vp->v_flag & VROOT) {
3102 		error = EBUSY;
3103 		goto out;
3104 	}
3105 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3106 		NDFREE(&nd, NDF_ONLY_PNBUF);
3107 		if (nd.ni_dvp == vp)
3108 			vrele(nd.ni_dvp);
3109 		else
3110 			vput(nd.ni_dvp);
3111 		vput(vp);
3112 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3113 			return (error);
3114 		goto restart;
3115 	}
3116 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
3117 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
3118 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3119 	vn_finished_write(mp);
3120 out:
3121 	NDFREE(&nd, NDF_ONLY_PNBUF);
3122 	if (nd.ni_dvp == vp)
3123 		vrele(nd.ni_dvp);
3124 	else
3125 		vput(nd.ni_dvp);
3126 	vput(vp);
3127 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3128 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3129 	return (error);
3130 }
3131 
3132 #ifdef COMPAT_43
3133 /*
3134  * Read a block of directory entries in a file system independent format.
3135  */
3136 #ifndef _SYS_SYSPROTO_H_
3137 struct ogetdirentries_args {
3138 	int	fd;
3139 	char	*buf;
3140 	u_int	count;
3141 	long	*basep;
3142 };
3143 #endif
3144 int
3145 ogetdirentries(td, uap)
3146 	struct thread *td;
3147 	register struct ogetdirentries_args /* {
3148 		syscallarg(int) fd;
3149 		syscallarg(char *) buf;
3150 		syscallarg(u_int) count;
3151 		syscallarg(long *) basep;
3152 	} */ *uap;
3153 {
3154 	struct vnode *vp;
3155 	struct file *fp;
3156 	struct uio auio, kuio;
3157 	struct iovec aiov, kiov;
3158 	struct dirent *dp, *edp;
3159 	caddr_t dirbuf;
3160 	int error, eofflag, readcnt;
3161 	long loff;
3162 
3163 	/* XXX arbitrary sanity limit on `count'. */
3164 	if (SCARG(uap, count) > 64 * 1024)
3165 		return (EINVAL);
3166 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3167 		return (error);
3168 	if ((fp->f_flag & FREAD) == 0)
3169 		return (EBADF);
3170 	vp = (struct vnode *)fp->f_data;
3171 unionread:
3172 	if (vp->v_type != VDIR)
3173 		return (EINVAL);
3174 	aiov.iov_base = SCARG(uap, buf);
3175 	aiov.iov_len = SCARG(uap, count);
3176 	auio.uio_iov = &aiov;
3177 	auio.uio_iovcnt = 1;
3178 	auio.uio_rw = UIO_READ;
3179 	auio.uio_segflg = UIO_USERSPACE;
3180 	auio.uio_td = td;
3181 	auio.uio_resid = SCARG(uap, count);
3182 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3183 	loff = auio.uio_offset = fp->f_offset;
3184 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3185 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3186 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3187 			    NULL, NULL);
3188 			fp->f_offset = auio.uio_offset;
3189 		} else
3190 #	endif
3191 	{
3192 		kuio = auio;
3193 		kuio.uio_iov = &kiov;
3194 		kuio.uio_segflg = UIO_SYSSPACE;
3195 		kiov.iov_len = SCARG(uap, count);
3196 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3197 		kiov.iov_base = dirbuf;
3198 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3199 			    NULL, NULL);
3200 		fp->f_offset = kuio.uio_offset;
3201 		if (error == 0) {
3202 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3203 			edp = (struct dirent *)&dirbuf[readcnt];
3204 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3205 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3206 					/*
3207 					 * The expected low byte of
3208 					 * dp->d_namlen is our dp->d_type.
3209 					 * The high MBZ byte of dp->d_namlen
3210 					 * is our dp->d_namlen.
3211 					 */
3212 					dp->d_type = dp->d_namlen;
3213 					dp->d_namlen = 0;
3214 #				else
3215 					/*
3216 					 * The dp->d_type is the high byte
3217 					 * of the expected dp->d_namlen,
3218 					 * so must be zero'ed.
3219 					 */
3220 					dp->d_type = 0;
3221 #				endif
3222 				if (dp->d_reclen > 0) {
3223 					dp = (struct dirent *)
3224 					    ((char *)dp + dp->d_reclen);
3225 				} else {
3226 					error = EIO;
3227 					break;
3228 				}
3229 			}
3230 			if (dp >= edp)
3231 				error = uiomove(dirbuf, readcnt, &auio);
3232 		}
3233 		FREE(dirbuf, M_TEMP);
3234 	}
3235 	VOP_UNLOCK(vp, 0, td);
3236 	if (error)
3237 		return (error);
3238 	if (SCARG(uap, count) == auio.uio_resid) {
3239 		if (union_dircheckp) {
3240 			error = union_dircheckp(td, &vp, fp);
3241 			if (error == -1)
3242 				goto unionread;
3243 			if (error)
3244 				return (error);
3245 		}
3246 		if ((vp->v_flag & VROOT) &&
3247 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3248 			struct vnode *tvp = vp;
3249 			vp = vp->v_mount->mnt_vnodecovered;
3250 			VREF(vp);
3251 			fp->f_data = (caddr_t) vp;
3252 			fp->f_offset = 0;
3253 			vrele(tvp);
3254 			goto unionread;
3255 		}
3256 	}
3257 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3258 	    sizeof(long));
3259 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3260 	return (error);
3261 }
3262 #endif /* COMPAT_43 */
3263 
3264 /*
3265  * Read a block of directory entries in a file system independent format.
3266  */
3267 #ifndef _SYS_SYSPROTO_H_
3268 struct getdirentries_args {
3269 	int	fd;
3270 	char	*buf;
3271 	u_int	count;
3272 	long	*basep;
3273 };
3274 #endif
3275 int
3276 getdirentries(td, uap)
3277 	struct thread *td;
3278 	register struct getdirentries_args /* {
3279 		syscallarg(int) fd;
3280 		syscallarg(char *) buf;
3281 		syscallarg(u_int) count;
3282 		syscallarg(long *) basep;
3283 	} */ *uap;
3284 {
3285 	struct vnode *vp;
3286 	struct file *fp;
3287 	struct uio auio;
3288 	struct iovec aiov;
3289 	long loff;
3290 	int error, eofflag;
3291 
3292 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3293 		return (error);
3294 	if ((fp->f_flag & FREAD) == 0)
3295 		return (EBADF);
3296 	vp = (struct vnode *)fp->f_data;
3297 unionread:
3298 	if (vp->v_type != VDIR)
3299 		return (EINVAL);
3300 	aiov.iov_base = SCARG(uap, buf);
3301 	aiov.iov_len = SCARG(uap, count);
3302 	auio.uio_iov = &aiov;
3303 	auio.uio_iovcnt = 1;
3304 	auio.uio_rw = UIO_READ;
3305 	auio.uio_segflg = UIO_USERSPACE;
3306 	auio.uio_td = td;
3307 	auio.uio_resid = SCARG(uap, count);
3308 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3309 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3310 	loff = auio.uio_offset = fp->f_offset;
3311 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3312 	fp->f_offset = auio.uio_offset;
3313 	VOP_UNLOCK(vp, 0, td);
3314 	if (error)
3315 		return (error);
3316 	if (SCARG(uap, count) == auio.uio_resid) {
3317 		if (union_dircheckp) {
3318 			error = union_dircheckp(td, &vp, fp);
3319 			if (error == -1)
3320 				goto unionread;
3321 			if (error)
3322 				return (error);
3323 		}
3324 		if ((vp->v_flag & VROOT) &&
3325 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3326 			struct vnode *tvp = vp;
3327 			vp = vp->v_mount->mnt_vnodecovered;
3328 			VREF(vp);
3329 			fp->f_data = (caddr_t) vp;
3330 			fp->f_offset = 0;
3331 			vrele(tvp);
3332 			goto unionread;
3333 		}
3334 	}
3335 	if (SCARG(uap, basep) != NULL) {
3336 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3337 		    sizeof(long));
3338 	}
3339 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3340 	return (error);
3341 }
3342 #ifndef _SYS_SYSPROTO_H_
3343 struct getdents_args {
3344 	int fd;
3345 	char *buf;
3346 	size_t count;
3347 };
3348 #endif
3349 int
3350 getdents(td, uap)
3351 	struct thread *td;
3352 	register struct getdents_args /* {
3353 		syscallarg(int) fd;
3354 		syscallarg(char *) buf;
3355 		syscallarg(u_int) count;
3356 	} */ *uap;
3357 {
3358 	struct getdirentries_args ap;
3359 	ap.fd = uap->fd;
3360 	ap.buf = uap->buf;
3361 	ap.count = uap->count;
3362 	ap.basep = NULL;
3363 	return getdirentries(td, &ap);
3364 }
3365 
3366 /*
3367  * Set the mode mask for creation of filesystem nodes.
3368  *
3369  * MP SAFE
3370  */
3371 #ifndef _SYS_SYSPROTO_H_
3372 struct umask_args {
3373 	int	newmask;
3374 };
3375 #endif
3376 int
3377 umask(td, uap)
3378 	struct thread *td;
3379 	struct umask_args /* {
3380 		syscallarg(int) newmask;
3381 	} */ *uap;
3382 {
3383 	register struct filedesc *fdp;
3384 
3385 	fdp = td->td_proc->p_fd;
3386 	td->td_retval[0] = fdp->fd_cmask;
3387 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3388 	return (0);
3389 }
3390 
3391 /*
3392  * Void all references to file by ripping underlying filesystem
3393  * away from vnode.
3394  */
3395 #ifndef _SYS_SYSPROTO_H_
3396 struct revoke_args {
3397 	char	*path;
3398 };
3399 #endif
3400 /* ARGSUSED */
3401 int
3402 revoke(td, uap)
3403 	struct thread *td;
3404 	register struct revoke_args /* {
3405 		syscallarg(char *) path;
3406 	} */ *uap;
3407 {
3408 	struct mount *mp;
3409 	struct vnode *vp;
3410 	struct vattr vattr;
3411 	int error;
3412 	struct nameidata nd;
3413 
3414 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3415 	if ((error = namei(&nd)) != 0)
3416 		return (error);
3417 	vp = nd.ni_vp;
3418 	NDFREE(&nd, NDF_ONLY_PNBUF);
3419 	if (vp->v_type != VCHR) {
3420 		error = EINVAL;
3421 		goto out;
3422 	}
3423 	error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td);
3424 	if (error)
3425 		goto out;
3426 	if (td->td_proc->p_ucred->cr_uid != vattr.va_uid) {
3427 		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
3428 		if (error)
3429 			goto out;
3430 	}
3431 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3432 		goto out;
3433 	if (vcount(vp) > 1)
3434 		VOP_REVOKE(vp, REVOKEALL);
3435 	vn_finished_write(mp);
3436 out:
3437 	vrele(vp);
3438 	return (error);
3439 }
3440 
3441 /*
3442  * Convert a user file descriptor to a kernel file entry.
3443  */
3444 int
3445 getvnode(fdp, fd, fpp)
3446 	struct filedesc *fdp;
3447 	int fd;
3448 	struct file **fpp;
3449 {
3450 	struct file *fp;
3451 
3452 	if ((u_int)fd >= fdp->fd_nfiles ||
3453 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3454 		return (EBADF);
3455 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3456 		return (EINVAL);
3457 	*fpp = fp;
3458 	return (0);
3459 }
3460 /*
3461  * Get (NFS) file handle
3462  */
3463 #ifndef _SYS_SYSPROTO_H_
3464 struct getfh_args {
3465 	char	*fname;
3466 	fhandle_t *fhp;
3467 };
3468 #endif
3469 int
3470 getfh(td, uap)
3471 	struct thread *td;
3472 	register struct getfh_args *uap;
3473 {
3474 	struct nameidata nd;
3475 	fhandle_t fh;
3476 	register struct vnode *vp;
3477 	int error;
3478 
3479 	/*
3480 	 * Must be super user
3481 	 */
3482 	error = suser_td(td);
3483 	if (error)
3484 		return (error);
3485 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3486 	error = namei(&nd);
3487 	if (error)
3488 		return (error);
3489 	NDFREE(&nd, NDF_ONLY_PNBUF);
3490 	vp = nd.ni_vp;
3491 	bzero(&fh, sizeof(fh));
3492 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3493 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3494 	vput(vp);
3495 	if (error)
3496 		return (error);
3497 	error = copyout(&fh, uap->fhp, sizeof (fh));
3498 	return (error);
3499 }
3500 
3501 /*
3502  * syscall for the rpc.lockd to use to translate a NFS file handle into
3503  * an open descriptor.
3504  *
3505  * warning: do not remove the suser() call or this becomes one giant
3506  * security hole.
3507  */
3508 #ifndef _SYS_SYSPROTO_H_
3509 struct fhopen_args {
3510 	const struct fhandle *u_fhp;
3511 	int flags;
3512 };
3513 #endif
3514 int
3515 fhopen(td, uap)
3516 	struct thread *td;
3517 	struct fhopen_args /* {
3518 		syscallarg(const struct fhandle *) u_fhp;
3519 		syscallarg(int) flags;
3520 	} */ *uap;
3521 {
3522 	struct proc *p = td->td_proc;
3523 	struct mount *mp;
3524 	struct vnode *vp;
3525 	struct fhandle fhp;
3526 	struct vattr vat;
3527 	struct vattr *vap = &vat;
3528 	struct flock lf;
3529 	struct file *fp;
3530 	register struct filedesc *fdp = p->p_fd;
3531 	int fmode, mode, error, type;
3532 	struct file *nfp;
3533 	int indx;
3534 
3535 	/*
3536 	 * Must be super user
3537 	 */
3538 	error = suser_td(td);
3539 	if (error)
3540 		return (error);
3541 
3542 	fmode = FFLAGS(SCARG(uap, flags));
3543 	/* why not allow a non-read/write open for our lockd? */
3544 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3545 		return (EINVAL);
3546 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3547 	if (error)
3548 		return(error);
3549 	/* find the mount point */
3550 	mp = vfs_getvfs(&fhp.fh_fsid);
3551 	if (mp == NULL)
3552 		return (ESTALE);
3553 	/* now give me my vnode, it gets returned to me locked */
3554 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3555 	if (error)
3556 		return (error);
3557  	/*
3558 	 * from now on we have to make sure not
3559 	 * to forget about the vnode
3560 	 * any error that causes an abort must vput(vp)
3561 	 * just set error = err and 'goto bad;'.
3562 	 */
3563 
3564 	/*
3565 	 * from vn_open
3566 	 */
3567 	if (vp->v_type == VLNK) {
3568 		error = EMLINK;
3569 		goto bad;
3570 	}
3571 	if (vp->v_type == VSOCK) {
3572 		error = EOPNOTSUPP;
3573 		goto bad;
3574 	}
3575 	mode = 0;
3576 	if (fmode & (FWRITE | O_TRUNC)) {
3577 		if (vp->v_type == VDIR) {
3578 			error = EISDIR;
3579 			goto bad;
3580 		}
3581 		error = vn_writechk(vp);
3582 		if (error)
3583 			goto bad;
3584 		mode |= VWRITE;
3585 	}
3586 	if (fmode & FREAD)
3587 		mode |= VREAD;
3588 	if (mode) {
3589 		error = VOP_ACCESS(vp, mode, p->p_ucred, td);
3590 		if (error)
3591 			goto bad;
3592 	}
3593 	if (fmode & O_TRUNC) {
3594 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3595 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3596 			vrele(vp);
3597 			return (error);
3598 		}
3599 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
3600 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3601 		VATTR_NULL(vap);
3602 		vap->va_size = 0;
3603 		error = VOP_SETATTR(vp, vap, p->p_ucred, td);
3604 		vn_finished_write(mp);
3605 		if (error)
3606 			goto bad;
3607 	}
3608 	error = VOP_OPEN(vp, fmode, p->p_ucred, td);
3609 	if (error)
3610 		goto bad;
3611 	/*
3612 	 * Make sure that a VM object is created for VMIO support.
3613 	 */
3614 	if (vn_canvmio(vp) == TRUE) {
3615 		if ((error = vfs_object_create(vp, td, p->p_ucred)) != 0)
3616 			goto bad;
3617 	}
3618 	if (fmode & FWRITE)
3619 		vp->v_writecount++;
3620 
3621 	/*
3622 	 * end of vn_open code
3623 	 */
3624 
3625 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3626 		if (fmode & FWRITE)
3627 			vp->v_writecount--;
3628 		goto bad;
3629 	}
3630 	fp = nfp;
3631 
3632 	/*
3633 	 * Hold an extra reference to avoid having fp ripped out
3634 	 * from under us while we block in the lock op
3635 	 */
3636 	fhold(fp);
3637 	nfp->f_data = (caddr_t)vp;
3638 	nfp->f_flag = fmode & FMASK;
3639 	nfp->f_ops = &vnops;
3640 	nfp->f_type = DTYPE_VNODE;
3641 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3642 		lf.l_whence = SEEK_SET;
3643 		lf.l_start = 0;
3644 		lf.l_len = 0;
3645 		if (fmode & O_EXLOCK)
3646 			lf.l_type = F_WRLCK;
3647 		else
3648 			lf.l_type = F_RDLCK;
3649 		type = F_FLOCK;
3650 		if ((fmode & FNONBLOCK) == 0)
3651 			type |= F_WAIT;
3652 		VOP_UNLOCK(vp, 0, td);
3653 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3654 			/*
3655 			 * The lock request failed.  Normally close the
3656 			 * descriptor but handle the case where someone might
3657 			 * have dup()d or close()d it when we weren't looking.
3658 			 */
3659 			if (fdp->fd_ofiles[indx] == fp) {
3660 				fdp->fd_ofiles[indx] = NULL;
3661 				fdrop(fp, td);
3662 			}
3663 			/*
3664 			 * release our private reference
3665 			 */
3666 			fdrop(fp, td);
3667 			return(error);
3668 		}
3669 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3670 		fp->f_flag |= FHASLOCK;
3671 	}
3672 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3673 		vfs_object_create(vp, td, p->p_ucred);
3674 
3675 	VOP_UNLOCK(vp, 0, td);
3676 	fdrop(fp, td);
3677 	td->td_retval[0] = indx;
3678 	return (0);
3679 
3680 bad:
3681 	vput(vp);
3682 	return (error);
3683 }
3684 
3685 /*
3686  * Stat an (NFS) file handle.
3687  */
3688 #ifndef _SYS_SYSPROTO_H_
3689 struct fhstat_args {
3690 	struct fhandle *u_fhp;
3691 	struct stat *sb;
3692 };
3693 #endif
3694 int
3695 fhstat(td, uap)
3696 	struct thread *td;
3697 	register struct fhstat_args /* {
3698 		syscallarg(struct fhandle *) u_fhp;
3699 		syscallarg(struct stat *) sb;
3700 	} */ *uap;
3701 {
3702 	struct stat sb;
3703 	fhandle_t fh;
3704 	struct mount *mp;
3705 	struct vnode *vp;
3706 	int error;
3707 
3708 	/*
3709 	 * Must be super user
3710 	 */
3711 	error = suser_td(td);
3712 	if (error)
3713 		return (error);
3714 
3715 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3716 	if (error)
3717 		return (error);
3718 
3719 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3720 		return (ESTALE);
3721 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3722 		return (error);
3723 	error = vn_stat(vp, &sb, td);
3724 	vput(vp);
3725 	if (error)
3726 		return (error);
3727 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3728 	return (error);
3729 }
3730 
3731 /*
3732  * Implement fstatfs() for (NFS) file handles.
3733  */
3734 #ifndef _SYS_SYSPROTO_H_
3735 struct fhstatfs_args {
3736 	struct fhandle *u_fhp;
3737 	struct statfs *buf;
3738 };
3739 #endif
3740 int
3741 fhstatfs(td, uap)
3742 	struct thread *td;
3743 	struct fhstatfs_args /* {
3744 		syscallarg(struct fhandle) *u_fhp;
3745 		syscallarg(struct statfs) *buf;
3746 	} */ *uap;
3747 {
3748 	struct statfs *sp;
3749 	struct mount *mp;
3750 	struct vnode *vp;
3751 	struct statfs sb;
3752 	fhandle_t fh;
3753 	int error;
3754 
3755 	/*
3756 	 * Must be super user
3757 	 */
3758 	error = suser_td(td);
3759 	if (error)
3760 		return (error);
3761 
3762 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3763 		return (error);
3764 
3765 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3766 		return (ESTALE);
3767 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3768 		return (error);
3769 	mp = vp->v_mount;
3770 	sp = &mp->mnt_stat;
3771 	vput(vp);
3772 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3773 		return (error);
3774 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3775 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
3776 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3777 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3778 		sp = &sb;
3779 	}
3780 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3781 }
3782 
3783 /*
3784  * Syscall to push extended attribute configuration information into the
3785  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3786  * a command (int cmd), and attribute name and misc data.  For now, the
3787  * attribute name is left in userspace for consumption by the VFS_op.
3788  * It will probably be changed to be copied into sysspace by the
3789  * syscall in the future, once issues with various consumers of the
3790  * attribute code have raised their hands.
3791  *
3792  * Currently this is used only by UFS Extended Attributes.
3793  */
3794 int
3795 extattrctl(td, uap)
3796 	struct thread *td;
3797 	struct extattrctl_args *uap;
3798 {
3799 	struct vnode *filename_vp;
3800 	struct nameidata nd;
3801 	struct mount *mp;
3802 	char attrname[EXTATTR_MAXNAMELEN];
3803 	int error;
3804 
3805 	/*
3806 	 * SCARG(uap, attrname) not always defined.  We check again later
3807 	 * when we invoke the VFS call so as to pass in NULL there if needed.
3808 	 */
3809 	if (SCARG(uap, attrname) != NULL) {
3810 		error = copyinstr(SCARG(uap, attrname), attrname,
3811 		    EXTATTR_MAXNAMELEN, NULL);
3812 		if (error)
3813 			return (error);
3814 	}
3815 
3816 	/*
3817 	 * SCARG(uap, filename) not always defined.  If it is, grab
3818 	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3819 	 */
3820 	filename_vp = NULL;
3821 	if (SCARG(uap, filename) != NULL) {
3822 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3823 		    SCARG(uap, filename), td);
3824 		if ((error = namei(&nd)) != 0)
3825 			return (error);
3826 		filename_vp = nd.ni_vp;
3827 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3828 	}
3829 
3830 	/* SCARG(uap, path) always defined. */
3831 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3832 	if ((error = namei(&nd)) != 0)
3833 		return (error);
3834 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3835 	NDFREE(&nd, 0);
3836 	if (error) {
3837 		if (filename_vp)
3838 			vrele(filename_vp);
3839 		return (error);
3840 	}
3841 
3842 	if (SCARG(uap, attrname) != NULL) {
3843 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3844 		    SCARG(uap, attrnamespace), attrname, td);
3845 	} else {
3846 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3847 		    SCARG(uap, attrnamespace), NULL, td);
3848 	}
3849 
3850 	vn_finished_write(mp);
3851 	/*
3852 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3853 	 * filename_vp, so vrele it if it is defined.
3854 	 */
3855 	if (filename_vp != NULL)
3856 		vrele(filename_vp);
3857 
3858 	return (error);
3859 }
3860 
3861 /*
3862  * extattr_set_vp(): Set a named extended attribute on a file or directory
3863  *
3864  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3865  *            kernelspace string pointer "attrname",
3866  *            userspace iovec array pointer "iovp", unsigned int iovcnt
3867  *            proc "p"
3868  * Returns: 0 on success, an error number otherwise
3869  * Locks: none
3870  * References: vp must be a valid reference for the duration of the call
3871  */
3872 static int
3873 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3874     struct iovec *iovp, unsigned iovcnt, struct thread *td)
3875 {
3876 	struct mount *mp;
3877 	struct uio auio;
3878 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3879 	u_int iovlen, cnt;
3880 	int error, i;
3881 
3882 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3883 		return (error);
3884 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
3885 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3886 
3887 	iovlen = iovcnt * sizeof(struct iovec);
3888 	if (iovcnt > UIO_SMALLIOV) {
3889 		if (iovcnt > UIO_MAXIOV) {
3890 			error = EINVAL;
3891 			goto done;
3892 		}
3893 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3894 		needfree = iov;
3895 	} else
3896 		iov = aiov;
3897 	auio.uio_iov = iov;
3898 	auio.uio_iovcnt = iovcnt;
3899 	auio.uio_rw = UIO_WRITE;
3900 	auio.uio_segflg = UIO_USERSPACE;
3901 	auio.uio_td = td;
3902 	auio.uio_offset = 0;
3903 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3904 		goto done;
3905 	auio.uio_resid = 0;
3906 	for (i = 0; i < iovcnt; i++) {
3907 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3908 			error = EINVAL;
3909 			goto done;
3910 		}
3911 		auio.uio_resid += iov->iov_len;
3912 		iov++;
3913 	}
3914 	cnt = auio.uio_resid;
3915 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3916 	    td->td_proc->p_ucred, td);
3917 	cnt -= auio.uio_resid;
3918 	td->td_retval[0] = cnt;
3919 done:
3920 	if (needfree)
3921 		FREE(needfree, M_IOV);
3922 	VOP_UNLOCK(vp, 0, td);
3923 	vn_finished_write(mp);
3924 	return (error);
3925 }
3926 
3927 int
3928 extattr_set_file(td, uap)
3929 	struct thread *td;
3930 	struct extattr_set_file_args *uap;
3931 {
3932 	struct nameidata nd;
3933 	char attrname[EXTATTR_MAXNAMELEN];
3934 	int error;
3935 
3936 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3937 	    NULL);
3938 	if (error)
3939 		return (error);
3940 
3941 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3942 	if ((error = namei(&nd)) != 0)
3943 		return (error);
3944 	NDFREE(&nd, NDF_ONLY_PNBUF);
3945 
3946 	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3947 	    SCARG(uap, iovp), SCARG(uap, iovcnt), td);
3948 
3949 	vrele(nd.ni_vp);
3950 	return (error);
3951 }
3952 
3953 int
3954 extattr_set_fd(td, uap)
3955 	struct thread *td;
3956 	struct extattr_set_fd_args *uap;
3957 {
3958 	struct file *fp;
3959 	char attrname[EXTATTR_MAXNAMELEN];
3960 	int error;
3961 
3962 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3963 	    NULL);
3964 	if (error)
3965 		return (error);
3966 
3967 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3968 		return (error);
3969 
3970 	error = extattr_set_vp((struct vnode *)fp->f_data,
3971 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3972 	    SCARG(uap, iovcnt), td);
3973 
3974 	return (error);
3975 }
3976 
3977 /*
3978  * extattr_get_vp(): Get a named extended attribute on a file or directory
3979  *
3980  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3981  *            kernelspace string pointer "attrname",
3982  *            userspace iovec array pointer "iovp", unsigned int iovcnt,
3983  *            proc "p"
3984  * Returns: 0 on success, an error number otherwise
3985  * Locks: none
3986  * References: vp must be a valid reference for the duration of the call
3987  */
3988 static int
3989 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3990     struct iovec *iovp, unsigned iovcnt, struct thread *td)
3991 {
3992 	struct uio auio;
3993 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3994 	u_int iovlen, cnt;
3995 	int error, i;
3996 
3997 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_READ);
3998 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3999 
4000 	iovlen = iovcnt * sizeof (struct iovec);
4001 	if (iovcnt > UIO_SMALLIOV) {
4002 		if (iovcnt > UIO_MAXIOV) {
4003 			error = EINVAL;
4004 			goto done;
4005 		}
4006 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
4007 		needfree = iov;
4008 	} else
4009 		iov = aiov;
4010 	auio.uio_iov = iov;
4011 	auio.uio_iovcnt = iovcnt;
4012 	auio.uio_rw = UIO_READ;
4013 	auio.uio_segflg = UIO_USERSPACE;
4014 	auio.uio_td = td;
4015 	auio.uio_offset = 0;
4016 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
4017 		goto done;
4018 	auio.uio_resid = 0;
4019 	for (i = 0; i < iovcnt; i++) {
4020 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
4021 			error = EINVAL;
4022 			goto done;
4023 		}
4024 		auio.uio_resid += iov->iov_len;
4025 		iov++;
4026 	}
4027 	cnt = auio.uio_resid;
4028 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4029 	    td->td_proc->p_ucred, td);
4030 	cnt -= auio.uio_resid;
4031 	td->td_retval[0] = cnt;
4032 done:
4033 	if (needfree)
4034 		FREE(needfree, M_IOV);
4035 	VOP_UNLOCK(vp, 0, td);
4036 	return (error);
4037 }
4038 
4039 int
4040 extattr_get_file(td, uap)
4041 	struct thread *td;
4042 	struct extattr_get_file_args *uap;
4043 {
4044 	struct nameidata nd;
4045 	char attrname[EXTATTR_MAXNAMELEN];
4046 	int error;
4047 
4048 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4049 	    NULL);
4050 	if (error)
4051 		return (error);
4052 
4053 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4054 	if ((error = namei(&nd)) != 0)
4055 		return (error);
4056 	NDFREE(&nd, NDF_ONLY_PNBUF);
4057 
4058 	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4059 	    SCARG(uap, iovp), SCARG(uap, iovcnt), td);
4060 
4061 	vrele(nd.ni_vp);
4062 	return (error);
4063 }
4064 
4065 int
4066 extattr_get_fd(td, uap)
4067 	struct thread *td;
4068 	struct extattr_get_fd_args *uap;
4069 {
4070 	struct file *fp;
4071 	char attrname[EXTATTR_MAXNAMELEN];
4072 	int error;
4073 
4074 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4075 	    NULL);
4076 	if (error)
4077 		return (error);
4078 
4079 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4080 		return (error);
4081 
4082 	error = extattr_get_vp((struct vnode *)fp->f_data,
4083 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
4084 	    SCARG(uap, iovcnt), td);
4085 
4086 	return (error);
4087 }
4088 
4089 /*
4090  * extattr_delete_vp(): Delete a named extended attribute on a file or
4091  *                      directory
4092  *
4093  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4094  *            kernelspace string pointer "attrname", proc "p"
4095  * Returns: 0 on success, an error number otherwise
4096  * Locks: none
4097  * References: vp must be a valid reference for the duration of the call
4098  */
4099 static int
4100 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4101     struct thread *td)
4102 {
4103 	struct mount *mp;
4104 	int error;
4105 
4106 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4107 		return (error);
4108 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
4109 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4110 
4111 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4112 	    td->td_proc->p_ucred, td);
4113 
4114 	VOP_UNLOCK(vp, 0, td);
4115 	vn_finished_write(mp);
4116 	return (error);
4117 }
4118 
4119 int
4120 extattr_delete_file(td, uap)
4121 	struct thread *td;
4122 	struct extattr_delete_file_args *uap;
4123 {
4124 	struct nameidata nd;
4125 	char attrname[EXTATTR_MAXNAMELEN];
4126 	int error;
4127 
4128 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4129 	     NULL);
4130 	if (error)
4131 		return(error);
4132 
4133 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4134 	if ((error = namei(&nd)) != 0)
4135 		return(error);
4136 	NDFREE(&nd, NDF_ONLY_PNBUF);
4137 
4138 	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4139 	    attrname, td);
4140 
4141 	vrele(nd.ni_vp);
4142 	return(error);
4143 }
4144 
4145 int
4146 extattr_delete_fd(td, uap)
4147 	struct thread *td;
4148 	struct extattr_delete_fd_args *uap;
4149 {
4150 	struct file *fp;
4151 	char attrname[EXTATTR_MAXNAMELEN];
4152 	int error;
4153 
4154 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4155 	    NULL);
4156 	if (error)
4157 		return (error);
4158 
4159 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4160 		return (error);
4161 
4162 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4163 	    SCARG(uap, attrnamespace), attrname, td);
4164 
4165 	return (error);
4166 }
4167