xref: /freebsd/sys/kern/vfs_extattr.c (revision 1a2cdef4962b47be5057809ce730a733b7f3c27c)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 
70 #include <machine/limits.h>
71 #include <miscfs/union/union.h>
72 #include <sys/sysctl.h>
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_zone.h>
76 #include <vm/vm_page.h>
77 
78 static int change_dir __P((struct nameidata *ndp, struct proc *p));
79 static void checkdirs __P((struct vnode *olddp, struct vnode *newdp));
80 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
81 static int getutimes __P((const struct timeval *, struct timespec *));
82 static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
83 static int setfmode __P((struct proc *, struct vnode *, int));
84 static int setfflags __P((struct proc *, struct vnode *, int));
85 static int setutimes __P((struct proc *, struct vnode *,
86     const struct timespec *, int));
87 static int	usermount = 0;	/* if 1, non-root can mount fs. */
88 
89 int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
90 
91 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
92 
93 /*
94  * Virtual File System System Calls
95  */
96 
97 /*
98  * Mount a file system.
99  */
100 #ifndef _SYS_SYSPROTO_H_
101 struct mount_args {
102 	char	*type;
103 	char	*path;
104 	int	flags;
105 	caddr_t	data;
106 };
107 #endif
108 /* ARGSUSED */
109 int
110 mount(p, uap)
111 	struct proc *p;
112 	struct mount_args /* {
113 		syscallarg(char *) type;
114 		syscallarg(char *) path;
115 		syscallarg(int) flags;
116 		syscallarg(caddr_t) data;
117 	} */ *uap;
118 {
119 	char *fstype;
120 	char *fspath;
121 	int error;
122 
123 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
124 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
125 
126 	/*
127 	 * vfs_mount() actually takes a kernel string for `type' and
128 	 * `path' now, so extract them.
129 	 */
130 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
131 	if (error)
132 		goto finish;
133 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
134 	if (error)
135 		goto finish;
136 	error = vfs_mount(p, fstype, fspath, SCARG(uap, flags),
137 	    SCARG(uap, data));
138 finish:
139 	free(fstype, M_TEMP);
140 	free(fspath, M_TEMP);
141 	return (error);
142 }
143 
144 /*
145  * vfs_mount(): actually attempt a filesystem mount.
146  *
147  * This routine is designed to be a "generic" entry point for routines
148  * that wish to mount a filesystem. All parameters except `fsdata' are
149  * pointers into kernel space. `fsdata' is currently still a pointer
150  * into userspace.
151  */
152 int
153 vfs_mount(p, fstype, fspath, fsflags, fsdata)
154 	struct proc *p;
155 	char *fstype;
156 	char *fspath;
157 	int fsflags;
158 	void *fsdata;
159 {
160 	struct vnode *vp;
161 	struct mount *mp;
162 	struct vfsconf *vfsp;
163 	int error, flag = 0, flag2 = 0;
164 	struct vattr va;
165 	struct nameidata nd;
166 
167 	/*
168 	 * Be ultra-paranoid about making sure the type and fspath
169 	 * variables will fit in our mp buffers, including the
170 	 * terminating NUL.
171 	 */
172 	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
173 	    (strlen(fspath) >= MNAMELEN - 1))
174 		return (ENAMETOOLONG);
175 
176 	if (usermount == 0 && (error = suser(p)))
177 		return (error);
178 	/*
179 	 * Do not allow NFS export by non-root users.
180 	 */
181 	if (fsflags & MNT_EXPORTED) {
182 		error = suser(p);
183 		if (error)
184 			return (error);
185 	}
186 	/*
187 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
188 	 */
189 	if (suser_xxx(p->p_ucred, 0, 0))
190 		fsflags |= MNT_NOSUID | MNT_NODEV;
191 	/*
192 	 * Get vnode to be covered
193 	 */
194 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
195 	if ((error = namei(&nd)) != 0)
196 		return (error);
197 	NDFREE(&nd, NDF_ONLY_PNBUF);
198 	vp = nd.ni_vp;
199 	if (fsflags & MNT_UPDATE) {
200 		if ((vp->v_flag & VROOT) == 0) {
201 			vput(vp);
202 			return (EINVAL);
203 		}
204 		mp = vp->v_mount;
205 		flag = mp->mnt_flag;
206 		flag2 = mp->mnt_kern_flag;
207 		/*
208 		 * We only allow the filesystem to be reloaded if it
209 		 * is currently mounted read-only.
210 		 */
211 		if ((fsflags & MNT_RELOAD) &&
212 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
213 			vput(vp);
214 			return (EOPNOTSUPP);	/* Needs translation */
215 		}
216 		/*
217 		 * Only root, or the user that did the original mount is
218 		 * permitted to update it.
219 		 */
220 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
221 		    (error = suser(p))) {
222 			vput(vp);
223 			return (error);
224 		}
225 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
226 			vput(vp);
227 			return (EBUSY);
228 		}
229 		mtx_lock(&vp->v_interlock);
230 		if ((vp->v_flag & VMOUNT) != 0 ||
231 		    vp->v_mountedhere != NULL) {
232 			mtx_unlock(&vp->v_interlock);
233 			vfs_unbusy(mp, p);
234 			vput(vp);
235 			return (EBUSY);
236 		}
237 		vp->v_flag |= VMOUNT;
238 		mtx_unlock(&vp->v_interlock);
239 		mp->mnt_flag |= fsflags &
240 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
241 		VOP_UNLOCK(vp, 0, p);
242 		goto update;
243 	}
244 	/*
245 	 * If the user is not root, ensure that they own the directory
246 	 * onto which we are attempting to mount.
247 	 */
248 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
249 	    (va.va_uid != p->p_ucred->cr_uid &&
250 	     (error = suser(p)))) {
251 		vput(vp);
252 		return (error);
253 	}
254 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
255 		vput(vp);
256 		return (error);
257 	}
258 	if (vp->v_type != VDIR) {
259 		vput(vp);
260 		return (ENOTDIR);
261 	}
262 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
263 		if (!strcmp(vfsp->vfc_name, fstype))
264 			break;
265 	if (vfsp == NULL) {
266 		linker_file_t lf;
267 
268 		/* Only load modules for root (very important!) */
269 		if ((error = suser(p)) != 0) {
270 			vput(vp);
271 			return error;
272 		}
273 		error = linker_load_file(fstype, &lf);
274 		if (error || lf == NULL) {
275 			vput(vp);
276 			if (lf == NULL)
277 				error = ENODEV;
278 			return error;
279 		}
280 		lf->userrefs++;
281 		/* lookup again, see if the VFS was loaded */
282 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
283 			if (!strcmp(vfsp->vfc_name, fstype))
284 				break;
285 		if (vfsp == NULL) {
286 			lf->userrefs--;
287 			linker_file_unload(lf);
288 			vput(vp);
289 			return (ENODEV);
290 		}
291 	}
292 	mtx_lock(&vp->v_interlock);
293 	if ((vp->v_flag & VMOUNT) != 0 ||
294 	    vp->v_mountedhere != NULL) {
295 		mtx_unlock(&vp->v_interlock);
296 		vput(vp);
297 		return (EBUSY);
298 	}
299 	vp->v_flag |= VMOUNT;
300 	mtx_unlock(&vp->v_interlock);
301 
302 	/*
303 	 * Allocate and initialize the filesystem.
304 	 */
305 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
306 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
307 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
308 	mp->mnt_op = vfsp->vfc_vfsops;
309 	mp->mnt_vfc = vfsp;
310 	vfsp->vfc_refcount++;
311 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
312 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
313 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
314 	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
315 	mp->mnt_vnodecovered = vp;
316 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
317 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
318 	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
319 	mp->mnt_iosize_max = DFLTPHYS;
320 	VOP_UNLOCK(vp, 0, p);
321 update:
322 	/*
323 	 * Set the mount level flags.
324 	 */
325 	if (fsflags & MNT_RDONLY)
326 		mp->mnt_flag |= MNT_RDONLY;
327 	else if (mp->mnt_flag & MNT_RDONLY)
328 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
329 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
330 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
331 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
332 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
333 	mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC |
334 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
335 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
336 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
337 	/*
338 	 * Mount the filesystem.
339 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
340 	 * get.  No freeing of cn_pnbuf.
341 	 */
342 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, p);
343 	if (mp->mnt_flag & MNT_UPDATE) {
344 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
345 			mp->mnt_flag &= ~MNT_RDONLY;
346 		mp->mnt_flag &=~
347 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
348 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
349 		if (error) {
350 			mp->mnt_flag = flag;
351 			mp->mnt_kern_flag = flag2;
352 		}
353 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
354 			if (mp->mnt_syncer == NULL)
355 				error = vfs_allocate_syncvnode(mp);
356 		} else {
357 			if (mp->mnt_syncer != NULL)
358 				vrele(mp->mnt_syncer);
359 			mp->mnt_syncer = NULL;
360 		}
361 		vfs_unbusy(mp, p);
362 		mtx_lock(&vp->v_interlock);
363 		vp->v_flag &= ~VMOUNT;
364 		mtx_unlock(&vp->v_interlock);
365 		vrele(vp);
366 		return (error);
367 	}
368 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
369 	/*
370 	 * Put the new filesystem on the mount list after root.
371 	 */
372 	cache_purge(vp);
373 	if (!error) {
374 		struct vnode *newdp;
375 
376 		mtx_lock(&vp->v_interlock);
377 		vp->v_flag &= ~VMOUNT;
378 		vp->v_mountedhere = mp;
379 		mtx_unlock(&vp->v_interlock);
380 		mtx_lock(&mountlist_mtx);
381 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
382 		mtx_unlock(&mountlist_mtx);
383 		if (VFS_ROOT(mp, &newdp))
384 			panic("mount: lost mount");
385 		checkdirs(vp, newdp);
386 		vput(newdp);
387 		VOP_UNLOCK(vp, 0, p);
388 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
389 			error = vfs_allocate_syncvnode(mp);
390 		vfs_unbusy(mp, p);
391 		if ((error = VFS_START(mp, 0, p)) != 0)
392 			vrele(vp);
393 	} else {
394 		mtx_lock(&vp->v_interlock);
395 		vp->v_flag &= ~VMOUNT;
396 		mtx_unlock(&vp->v_interlock);
397 		mp->mnt_vfc->vfc_refcount--;
398 		vfs_unbusy(mp, p);
399 		free((caddr_t)mp, M_MOUNT);
400 		vput(vp);
401 	}
402 	return (error);
403 }
404 
405 /*
406  * Scan all active processes to see if any of them have a current
407  * or root directory of `olddp'. If so, replace them with the new
408  * mount point.
409  */
410 static void
411 checkdirs(olddp, newdp)
412 	struct vnode *olddp, *newdp;
413 {
414 	struct filedesc *fdp;
415 	struct proc *p;
416 
417 	if (olddp->v_usecount == 1)
418 		return;
419 	sx_slock(&allproc_lock);
420 	LIST_FOREACH(p, &allproc, p_list) {
421 		fdp = p->p_fd;
422 		if (fdp == NULL)
423 			continue;
424 		if (fdp->fd_cdir == olddp) {
425 			vrele(fdp->fd_cdir);
426 			VREF(newdp);
427 			fdp->fd_cdir = newdp;
428 		}
429 		if (fdp->fd_rdir == olddp) {
430 			vrele(fdp->fd_rdir);
431 			VREF(newdp);
432 			fdp->fd_rdir = newdp;
433 		}
434 	}
435 	sx_sunlock(&allproc_lock);
436 	if (rootvnode == olddp) {
437 		vrele(rootvnode);
438 		VREF(newdp);
439 		rootvnode = newdp;
440 	}
441 }
442 
443 /*
444  * Unmount a file system.
445  *
446  * Note: unmount takes a path to the vnode mounted on as argument,
447  * not special file (as before).
448  */
449 #ifndef _SYS_SYSPROTO_H_
450 struct unmount_args {
451 	char	*path;
452 	int	flags;
453 };
454 #endif
455 /* ARGSUSED */
456 int
457 unmount(p, uap)
458 	struct proc *p;
459 	register struct unmount_args /* {
460 		syscallarg(char *) path;
461 		syscallarg(int) flags;
462 	} */ *uap;
463 {
464 	register struct vnode *vp;
465 	struct mount *mp;
466 	int error;
467 	struct nameidata nd;
468 
469 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
470 	    SCARG(uap, path), p);
471 	if ((error = namei(&nd)) != 0)
472 		return (error);
473 	vp = nd.ni_vp;
474 	NDFREE(&nd, NDF_ONLY_PNBUF);
475 	mp = vp->v_mount;
476 
477 	/*
478 	 * Only root, or the user that did the original mount is
479 	 * permitted to unmount this filesystem.
480 	 */
481 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
482 	    (error = suser(p))) {
483 		vput(vp);
484 		return (error);
485 	}
486 
487 	/*
488 	 * Don't allow unmounting the root file system.
489 	 */
490 	if (mp->mnt_flag & MNT_ROOTFS) {
491 		vput(vp);
492 		return (EINVAL);
493 	}
494 
495 	/*
496 	 * Must be the root of the filesystem
497 	 */
498 	if ((vp->v_flag & VROOT) == 0) {
499 		vput(vp);
500 		return (EINVAL);
501 	}
502 	vput(vp);
503 	return (dounmount(mp, SCARG(uap, flags), p));
504 }
505 
506 /*
507  * Do the actual file system unmount.
508  */
509 int
510 dounmount(mp, flags, p)
511 	struct mount *mp;
512 	int flags;
513 	struct proc *p;
514 {
515 	struct vnode *coveredvp, *fsrootvp;
516 	int error;
517 	int async_flag;
518 
519 	mtx_lock(&mountlist_mtx);
520 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
521 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, p);
522 	vn_start_write(NULL, &mp, V_WAIT);
523 
524 	if (mp->mnt_flag & MNT_EXPUBLIC)
525 		vfs_setpublicfs(NULL, NULL, NULL);
526 
527 	vfs_msync(mp, MNT_WAIT);
528 	async_flag = mp->mnt_flag & MNT_ASYNC;
529 	mp->mnt_flag &=~ MNT_ASYNC;
530 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
531 	if (mp->mnt_syncer != NULL)
532 		vrele(mp->mnt_syncer);
533 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
534 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
535 		if (mp->mnt_vnodecovered != NULL)
536 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
537 		if (fsrootvp == rootvnode) {
538 			vrele(rootvnode);
539 			rootvnode = NULL;
540 		}
541 		vput(fsrootvp);
542 	}
543 	if (((mp->mnt_flag & MNT_RDONLY) ||
544 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
545 	    (flags & MNT_FORCE)) {
546 		error = VFS_UNMOUNT(mp, flags, p);
547 	}
548 	vn_finished_write(mp);
549 	mtx_lock(&mountlist_mtx);
550 	if (error) {
551 		/* Undo cdir/rdir and rootvnode changes made above. */
552 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
553 			if (mp->mnt_vnodecovered != NULL)
554 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
555 			if (rootvnode == NULL) {
556 				rootvnode = fsrootvp;
557 				vref(rootvnode);
558 			}
559 			vput(fsrootvp);
560 		}
561 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
562 			(void) vfs_allocate_syncvnode(mp);
563 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
564 		mp->mnt_flag |= async_flag;
565 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
566 		    &mountlist_mtx, p);
567 		if (mp->mnt_kern_flag & MNTK_MWAIT)
568 			wakeup((caddr_t)mp);
569 		return (error);
570 	}
571 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
572 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
573 		coveredvp->v_mountedhere = (struct mount *)0;
574 		vrele(coveredvp);
575 	}
576 	mp->mnt_vfc->vfc_refcount--;
577 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
578 		panic("unmount: dangling vnode");
579 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, p);
580 	lockdestroy(&mp->mnt_lock);
581 	if (mp->mnt_kern_flag & MNTK_MWAIT)
582 		wakeup((caddr_t)mp);
583 	free((caddr_t)mp, M_MOUNT);
584 	return (0);
585 }
586 
587 /*
588  * Sync each mounted filesystem.
589  */
590 #ifndef _SYS_SYSPROTO_H_
591 struct sync_args {
592         int     dummy;
593 };
594 #endif
595 
596 #ifdef DEBUG
597 static int syncprt = 0;
598 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
599 #endif
600 
601 /* ARGSUSED */
602 int
603 sync(p, uap)
604 	struct proc *p;
605 	struct sync_args *uap;
606 {
607 	struct mount *mp, *nmp;
608 	int asyncflag;
609 
610 	mtx_lock(&mountlist_mtx);
611 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
612 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
613 			nmp = TAILQ_NEXT(mp, mnt_list);
614 			continue;
615 		}
616 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
617 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
618 			asyncflag = mp->mnt_flag & MNT_ASYNC;
619 			mp->mnt_flag &= ~MNT_ASYNC;
620 			vfs_msync(mp, MNT_NOWAIT);
621 			VFS_SYNC(mp, MNT_NOWAIT,
622 			    ((p != NULL) ? p->p_ucred : NOCRED), p);
623 			mp->mnt_flag |= asyncflag;
624 			vn_finished_write(mp);
625 		}
626 		mtx_lock(&mountlist_mtx);
627 		nmp = TAILQ_NEXT(mp, mnt_list);
628 		vfs_unbusy(mp, p);
629 	}
630 	mtx_unlock(&mountlist_mtx);
631 #if 0
632 /*
633  * XXX don't call vfs_bufstats() yet because that routine
634  * was not imported in the Lite2 merge.
635  */
636 #ifdef DIAGNOSTIC
637 	if (syncprt)
638 		vfs_bufstats();
639 #endif /* DIAGNOSTIC */
640 #endif
641 	return (0);
642 }
643 
644 /* XXX PRISON: could be per prison flag */
645 static int prison_quotas;
646 #if 0
647 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
648 #endif
649 
650 /*
651  * Change filesystem quotas.
652  */
653 #ifndef _SYS_SYSPROTO_H_
654 struct quotactl_args {
655 	char *path;
656 	int cmd;
657 	int uid;
658 	caddr_t arg;
659 };
660 #endif
661 /* ARGSUSED */
662 int
663 quotactl(p, uap)
664 	struct proc *p;
665 	register struct quotactl_args /* {
666 		syscallarg(char *) path;
667 		syscallarg(int) cmd;
668 		syscallarg(int) uid;
669 		syscallarg(caddr_t) arg;
670 	} */ *uap;
671 {
672 	struct mount *mp;
673 	int error;
674 	struct nameidata nd;
675 
676 	if (jailed(p->p_ucred) && !prison_quotas)
677 		return (EPERM);
678 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
679 	if ((error = namei(&nd)) != 0)
680 		return (error);
681 	NDFREE(&nd, NDF_ONLY_PNBUF);
682 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
683 	vrele(nd.ni_vp);
684 	if (error)
685 		return (error);
686 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
687 	    SCARG(uap, arg), p);
688 	vn_finished_write(mp);
689 	return (error);
690 }
691 
692 /*
693  * Get filesystem statistics.
694  */
695 #ifndef _SYS_SYSPROTO_H_
696 struct statfs_args {
697 	char *path;
698 	struct statfs *buf;
699 };
700 #endif
701 /* ARGSUSED */
702 int
703 statfs(p, uap)
704 	struct proc *p;
705 	register struct statfs_args /* {
706 		syscallarg(char *) path;
707 		syscallarg(struct statfs *) buf;
708 	} */ *uap;
709 {
710 	register struct mount *mp;
711 	register struct statfs *sp;
712 	int error;
713 	struct nameidata nd;
714 	struct statfs sb;
715 
716 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
717 	if ((error = namei(&nd)) != 0)
718 		return (error);
719 	mp = nd.ni_vp->v_mount;
720 	sp = &mp->mnt_stat;
721 	NDFREE(&nd, NDF_ONLY_PNBUF);
722 	vrele(nd.ni_vp);
723 	error = VFS_STATFS(mp, sp, p);
724 	if (error)
725 		return (error);
726 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
727 	if (suser_xxx(p->p_ucred, 0, 0)) {
728 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
729 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
730 		sp = &sb;
731 	}
732 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
733 }
734 
735 /*
736  * Get filesystem statistics.
737  */
738 #ifndef _SYS_SYSPROTO_H_
739 struct fstatfs_args {
740 	int fd;
741 	struct statfs *buf;
742 };
743 #endif
744 /* ARGSUSED */
745 int
746 fstatfs(p, uap)
747 	struct proc *p;
748 	register struct fstatfs_args /* {
749 		syscallarg(int) fd;
750 		syscallarg(struct statfs *) buf;
751 	} */ *uap;
752 {
753 	struct file *fp;
754 	struct mount *mp;
755 	register struct statfs *sp;
756 	int error;
757 	struct statfs sb;
758 
759 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
760 		return (error);
761 	mp = ((struct vnode *)fp->f_data)->v_mount;
762 	sp = &mp->mnt_stat;
763 	error = VFS_STATFS(mp, sp, p);
764 	if (error)
765 		return (error);
766 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
767 	if (suser_xxx(p->p_ucred, 0, 0)) {
768 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
769 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
770 		sp = &sb;
771 	}
772 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
773 }
774 
775 /*
776  * Get statistics on all filesystems.
777  */
778 #ifndef _SYS_SYSPROTO_H_
779 struct getfsstat_args {
780 	struct statfs *buf;
781 	long bufsize;
782 	int flags;
783 };
784 #endif
785 int
786 getfsstat(p, uap)
787 	struct proc *p;
788 	register struct getfsstat_args /* {
789 		syscallarg(struct statfs *) buf;
790 		syscallarg(long) bufsize;
791 		syscallarg(int) flags;
792 	} */ *uap;
793 {
794 	register struct mount *mp, *nmp;
795 	register struct statfs *sp;
796 	caddr_t sfsp;
797 	long count, maxcount, error;
798 
799 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
800 	sfsp = (caddr_t)SCARG(uap, buf);
801 	count = 0;
802 	mtx_lock(&mountlist_mtx);
803 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
804 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
805 			nmp = TAILQ_NEXT(mp, mnt_list);
806 			continue;
807 		}
808 		if (sfsp && count < maxcount) {
809 			sp = &mp->mnt_stat;
810 			/*
811 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
812 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
813 			 * overrides MNT_WAIT.
814 			 */
815 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
816 			    (SCARG(uap, flags) & MNT_WAIT)) &&
817 			    (error = VFS_STATFS(mp, sp, p))) {
818 				mtx_lock(&mountlist_mtx);
819 				nmp = TAILQ_NEXT(mp, mnt_list);
820 				vfs_unbusy(mp, p);
821 				continue;
822 			}
823 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
824 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
825 			if (error) {
826 				vfs_unbusy(mp, p);
827 				return (error);
828 			}
829 			sfsp += sizeof(*sp);
830 		}
831 		count++;
832 		mtx_lock(&mountlist_mtx);
833 		nmp = TAILQ_NEXT(mp, mnt_list);
834 		vfs_unbusy(mp, p);
835 	}
836 	mtx_unlock(&mountlist_mtx);
837 	if (sfsp && count > maxcount)
838 		p->p_retval[0] = maxcount;
839 	else
840 		p->p_retval[0] = count;
841 	return (0);
842 }
843 
844 /*
845  * Change current working directory to a given file descriptor.
846  */
847 #ifndef _SYS_SYSPROTO_H_
848 struct fchdir_args {
849 	int	fd;
850 };
851 #endif
852 /* ARGSUSED */
853 int
854 fchdir(p, uap)
855 	struct proc *p;
856 	struct fchdir_args /* {
857 		syscallarg(int) fd;
858 	} */ *uap;
859 {
860 	register struct filedesc *fdp = p->p_fd;
861 	struct vnode *vp, *tdp;
862 	struct mount *mp;
863 	struct file *fp;
864 	int error;
865 
866 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
867 		return (error);
868 	vp = (struct vnode *)fp->f_data;
869 	VREF(vp);
870 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
871 	if (vp->v_type != VDIR)
872 		error = ENOTDIR;
873 	else
874 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
875 	while (!error && (mp = vp->v_mountedhere) != NULL) {
876 		if (vfs_busy(mp, 0, 0, p))
877 			continue;
878 		error = VFS_ROOT(mp, &tdp);
879 		vfs_unbusy(mp, p);
880 		if (error)
881 			break;
882 		vput(vp);
883 		vp = tdp;
884 	}
885 	if (error) {
886 		vput(vp);
887 		return (error);
888 	}
889 	VOP_UNLOCK(vp, 0, p);
890 	vrele(fdp->fd_cdir);
891 	fdp->fd_cdir = vp;
892 	return (0);
893 }
894 
895 /*
896  * Change current working directory (``.'').
897  */
898 #ifndef _SYS_SYSPROTO_H_
899 struct chdir_args {
900 	char	*path;
901 };
902 #endif
903 /* ARGSUSED */
904 int
905 chdir(p, uap)
906 	struct proc *p;
907 	struct chdir_args /* {
908 		syscallarg(char *) path;
909 	} */ *uap;
910 {
911 	register struct filedesc *fdp = p->p_fd;
912 	int error;
913 	struct nameidata nd;
914 
915 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
916 	    SCARG(uap, path), p);
917 	if ((error = change_dir(&nd, p)) != 0)
918 		return (error);
919 	NDFREE(&nd, NDF_ONLY_PNBUF);
920 	vrele(fdp->fd_cdir);
921 	fdp->fd_cdir = nd.ni_vp;
922 	return (0);
923 }
924 
925 /*
926  * Helper function for raised chroot(2) security function:  Refuse if
927  * any filedescriptors are open directories.
928  */
929 static int
930 chroot_refuse_vdir_fds(fdp)
931 	struct filedesc *fdp;
932 {
933 	struct vnode *vp;
934 	struct file *fp;
935 	int error;
936 	int fd;
937 
938 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
939 		error = getvnode(fdp, fd, &fp);
940 		if (error)
941 			continue;
942 		vp = (struct vnode *)fp->f_data;
943 		if (vp->v_type != VDIR)
944 			continue;
945 		return(EPERM);
946 	}
947 	return (0);
948 }
949 
950 /*
951  * This sysctl determines if we will allow a process to chroot(2) if it
952  * has a directory open:
953  *	0: disallowed for all processes.
954  *	1: allowed for processes that were not already chroot(2)'ed.
955  *	2: allowed for all processes.
956  */
957 
958 static int chroot_allow_open_directories = 1;
959 
960 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
961      &chroot_allow_open_directories, 0, "");
962 
963 /*
964  * Change notion of root (``/'') directory.
965  */
966 #ifndef _SYS_SYSPROTO_H_
967 struct chroot_args {
968 	char	*path;
969 };
970 #endif
971 /* ARGSUSED */
972 int
973 chroot(p, uap)
974 	struct proc *p;
975 	struct chroot_args /* {
976 		syscallarg(char *) path;
977 	} */ *uap;
978 {
979 	register struct filedesc *fdp = p->p_fd;
980 	int error;
981 	struct nameidata nd;
982 
983 	error = suser_xxx(0, p, PRISON_ROOT);
984 	if (error)
985 		return (error);
986 	if (chroot_allow_open_directories == 0 ||
987 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
988 		error = chroot_refuse_vdir_fds(fdp);
989 	if (error)
990 		return (error);
991 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
992 	    SCARG(uap, path), p);
993 	if ((error = change_dir(&nd, p)) != 0)
994 		return (error);
995 	NDFREE(&nd, NDF_ONLY_PNBUF);
996 	vrele(fdp->fd_rdir);
997 	fdp->fd_rdir = nd.ni_vp;
998 	if (!fdp->fd_jdir) {
999 		fdp->fd_jdir = nd.ni_vp;
1000                 VREF(fdp->fd_jdir);
1001 	}
1002 	return (0);
1003 }
1004 
1005 /*
1006  * Common routine for chroot and chdir.
1007  */
1008 static int
1009 change_dir(ndp, p)
1010 	register struct nameidata *ndp;
1011 	struct proc *p;
1012 {
1013 	struct vnode *vp;
1014 	int error;
1015 
1016 	error = namei(ndp);
1017 	if (error)
1018 		return (error);
1019 	vp = ndp->ni_vp;
1020 	if (vp->v_type != VDIR)
1021 		error = ENOTDIR;
1022 	else
1023 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
1024 	if (error)
1025 		vput(vp);
1026 	else
1027 		VOP_UNLOCK(vp, 0, p);
1028 	return (error);
1029 }
1030 
1031 /*
1032  * Check permissions, allocate an open file structure,
1033  * and call the device open routine if any.
1034  */
1035 #ifndef _SYS_SYSPROTO_H_
1036 struct open_args {
1037 	char	*path;
1038 	int	flags;
1039 	int	mode;
1040 };
1041 #endif
1042 int
1043 open(p, uap)
1044 	struct proc *p;
1045 	register struct open_args /* {
1046 		syscallarg(char *) path;
1047 		syscallarg(int) flags;
1048 		syscallarg(int) mode;
1049 	} */ *uap;
1050 {
1051 	struct filedesc *fdp = p->p_fd;
1052 	struct file *fp;
1053 	struct vnode *vp;
1054 	struct vattr vat;
1055 	struct mount *mp;
1056 	int cmode, flags, oflags;
1057 	struct file *nfp;
1058 	int type, indx, error;
1059 	struct flock lf;
1060 	struct nameidata nd;
1061 
1062 	oflags = SCARG(uap, flags);
1063 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1064 		return (EINVAL);
1065 	flags = FFLAGS(oflags);
1066 	error = falloc(p, &nfp, &indx);
1067 	if (error)
1068 		return (error);
1069 	fp = nfp;
1070 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1071 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1072 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1073 	/*
1074 	 * Bump the ref count to prevent another process from closing
1075 	 * the descriptor while we are blocked in vn_open()
1076 	 */
1077 	fhold(fp);
1078 	error = vn_open(&nd, &flags, cmode);
1079 	if (error) {
1080 		/*
1081 		 * release our own reference
1082 		 */
1083 		fdrop(fp, p);
1084 
1085 		/*
1086 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1087 		 * responsible for dropping the old contents of ofiles[indx]
1088 		 * if it succeeds.
1089 		 */
1090 		if ((error == ENODEV || error == ENXIO) &&
1091 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1092 		    (error =
1093 			dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) {
1094 			p->p_retval[0] = indx;
1095 			return (0);
1096 		}
1097 		/*
1098 		 * Clean up the descriptor, but only if another thread hadn't
1099 		 * replaced or closed it.
1100 		 */
1101 		if (fdp->fd_ofiles[indx] == fp) {
1102 			fdp->fd_ofiles[indx] = NULL;
1103 			fdrop(fp, p);
1104 		}
1105 
1106 		if (error == ERESTART)
1107 			error = EINTR;
1108 		return (error);
1109 	}
1110 	p->p_dupfd = 0;
1111 	NDFREE(&nd, NDF_ONLY_PNBUF);
1112 	vp = nd.ni_vp;
1113 
1114 	/*
1115 	 * There should be 2 references on the file, one from the descriptor
1116 	 * table, and one for us.
1117 	 *
1118 	 * Handle the case where someone closed the file (via its file
1119 	 * descriptor) while we were blocked.  The end result should look
1120 	 * like opening the file succeeded but it was immediately closed.
1121 	 */
1122 	if (fp->f_count == 1) {
1123 		KASSERT(fdp->fd_ofiles[indx] != fp,
1124 		    ("Open file descriptor lost all refs"));
1125 		VOP_UNLOCK(vp, 0, p);
1126 		vn_close(vp, flags & FMASK, fp->f_cred, p);
1127 		fdrop(fp, p);
1128 		p->p_retval[0] = indx;
1129 		return 0;
1130 	}
1131 
1132 	fp->f_data = (caddr_t)vp;
1133 	fp->f_flag = flags & FMASK;
1134 	fp->f_ops = &vnops;
1135 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1136 	VOP_UNLOCK(vp, 0, p);
1137 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1138 		lf.l_whence = SEEK_SET;
1139 		lf.l_start = 0;
1140 		lf.l_len = 0;
1141 		if (flags & O_EXLOCK)
1142 			lf.l_type = F_WRLCK;
1143 		else
1144 			lf.l_type = F_RDLCK;
1145 		type = F_FLOCK;
1146 		if ((flags & FNONBLOCK) == 0)
1147 			type |= F_WAIT;
1148 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1149 			goto bad;
1150 		fp->f_flag |= FHASLOCK;
1151 	}
1152 	if (flags & O_TRUNC) {
1153 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1154 			goto bad;
1155 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1156 		VATTR_NULL(&vat);
1157 		vat.va_size = 0;
1158 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1159 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
1160 		VOP_UNLOCK(vp, 0, p);
1161 		vn_finished_write(mp);
1162 		if (error)
1163 			goto bad;
1164 	}
1165 	/* assert that vn_open created a backing object if one is needed */
1166 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1167 		("open: vmio vnode has no backing object after vn_open"));
1168 	/*
1169 	 * Release our private reference, leaving the one associated with
1170 	 * the descriptor table intact.
1171 	 */
1172 	fdrop(fp, p);
1173 	p->p_retval[0] = indx;
1174 	return (0);
1175 bad:
1176 	if (fdp->fd_ofiles[indx] == fp) {
1177 		fdp->fd_ofiles[indx] = NULL;
1178 		fdrop(fp, p);
1179 	}
1180 	fdrop(fp, p);
1181 	return (error);
1182 }
1183 
1184 #ifdef COMPAT_43
1185 /*
1186  * Create a file.
1187  */
1188 #ifndef _SYS_SYSPROTO_H_
1189 struct ocreat_args {
1190 	char	*path;
1191 	int	mode;
1192 };
1193 #endif
1194 int
1195 ocreat(p, uap)
1196 	struct proc *p;
1197 	register struct ocreat_args /* {
1198 		syscallarg(char *) path;
1199 		syscallarg(int) mode;
1200 	} */ *uap;
1201 {
1202 	struct open_args /* {
1203 		syscallarg(char *) path;
1204 		syscallarg(int) flags;
1205 		syscallarg(int) mode;
1206 	} */ nuap;
1207 
1208 	SCARG(&nuap, path) = SCARG(uap, path);
1209 	SCARG(&nuap, mode) = SCARG(uap, mode);
1210 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1211 	return (open(p, &nuap));
1212 }
1213 #endif /* COMPAT_43 */
1214 
1215 /*
1216  * Create a special file.
1217  */
1218 #ifndef _SYS_SYSPROTO_H_
1219 struct mknod_args {
1220 	char	*path;
1221 	int	mode;
1222 	int	dev;
1223 };
1224 #endif
1225 /* ARGSUSED */
1226 int
1227 mknod(p, uap)
1228 	struct proc *p;
1229 	register struct mknod_args /* {
1230 		syscallarg(char *) path;
1231 		syscallarg(int) mode;
1232 		syscallarg(int) dev;
1233 	} */ *uap;
1234 {
1235 	struct vnode *vp;
1236 	struct mount *mp;
1237 	struct vattr vattr;
1238 	int error;
1239 	int whiteout = 0;
1240 	struct nameidata nd;
1241 
1242 	switch (SCARG(uap, mode) & S_IFMT) {
1243 	case S_IFCHR:
1244 	case S_IFBLK:
1245 		error = suser(p);
1246 		break;
1247 	default:
1248 		error = suser_xxx(0, p, PRISON_ROOT);
1249 		break;
1250 	}
1251 	if (error)
1252 		return (error);
1253 restart:
1254 	bwillwrite();
1255 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1256 	if ((error = namei(&nd)) != 0)
1257 		return (error);
1258 	vp = nd.ni_vp;
1259 	if (vp != NULL) {
1260 		vrele(vp);
1261 		error = EEXIST;
1262 	} else {
1263 		VATTR_NULL(&vattr);
1264 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1265 		vattr.va_rdev = SCARG(uap, dev);
1266 		whiteout = 0;
1267 
1268 		switch (SCARG(uap, mode) & S_IFMT) {
1269 		case S_IFMT:	/* used by badsect to flag bad sectors */
1270 			vattr.va_type = VBAD;
1271 			break;
1272 		case S_IFCHR:
1273 			vattr.va_type = VCHR;
1274 			break;
1275 		case S_IFBLK:
1276 			vattr.va_type = VBLK;
1277 			break;
1278 		case S_IFWHT:
1279 			whiteout = 1;
1280 			break;
1281 		default:
1282 			error = EINVAL;
1283 			break;
1284 		}
1285 	}
1286 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1287 		NDFREE(&nd, NDF_ONLY_PNBUF);
1288 		vput(nd.ni_dvp);
1289 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1290 			return (error);
1291 		goto restart;
1292 	}
1293 	if (!error) {
1294 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1295 		if (whiteout)
1296 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1297 		else {
1298 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1299 						&nd.ni_cnd, &vattr);
1300 			if (error == 0)
1301 				vput(nd.ni_vp);
1302 		}
1303 	}
1304 	NDFREE(&nd, NDF_ONLY_PNBUF);
1305 	vput(nd.ni_dvp);
1306 	vn_finished_write(mp);
1307 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1308 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1309 	return (error);
1310 }
1311 
1312 /*
1313  * Create a named pipe.
1314  */
1315 #ifndef _SYS_SYSPROTO_H_
1316 struct mkfifo_args {
1317 	char	*path;
1318 	int	mode;
1319 };
1320 #endif
1321 /* ARGSUSED */
1322 int
1323 mkfifo(p, uap)
1324 	struct proc *p;
1325 	register struct mkfifo_args /* {
1326 		syscallarg(char *) path;
1327 		syscallarg(int) mode;
1328 	} */ *uap;
1329 {
1330 	struct mount *mp;
1331 	struct vattr vattr;
1332 	int error;
1333 	struct nameidata nd;
1334 
1335 restart:
1336 	bwillwrite();
1337 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1338 	if ((error = namei(&nd)) != 0)
1339 		return (error);
1340 	if (nd.ni_vp != NULL) {
1341 		NDFREE(&nd, NDF_ONLY_PNBUF);
1342 		vrele(nd.ni_vp);
1343 		vput(nd.ni_dvp);
1344 		return (EEXIST);
1345 	}
1346 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1347 		NDFREE(&nd, NDF_ONLY_PNBUF);
1348 		vput(nd.ni_dvp);
1349 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1350 			return (error);
1351 		goto restart;
1352 	}
1353 	VATTR_NULL(&vattr);
1354 	vattr.va_type = VFIFO;
1355 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1356 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1357 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1358 	if (error == 0)
1359 		vput(nd.ni_vp);
1360 	NDFREE(&nd, NDF_ONLY_PNBUF);
1361 	vput(nd.ni_dvp);
1362 	vn_finished_write(mp);
1363 	return (error);
1364 }
1365 
1366 /*
1367  * Make a hard file link.
1368  */
1369 #ifndef _SYS_SYSPROTO_H_
1370 struct link_args {
1371 	char	*path;
1372 	char	*link;
1373 };
1374 #endif
1375 /* ARGSUSED */
1376 int
1377 link(p, uap)
1378 	struct proc *p;
1379 	register struct link_args /* {
1380 		syscallarg(char *) path;
1381 		syscallarg(char *) link;
1382 	} */ *uap;
1383 {
1384 	struct vnode *vp;
1385 	struct mount *mp;
1386 	struct nameidata nd;
1387 	int error;
1388 
1389 	bwillwrite();
1390 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1391 	if ((error = namei(&nd)) != 0)
1392 		return (error);
1393 	NDFREE(&nd, NDF_ONLY_PNBUF);
1394 	vp = nd.ni_vp;
1395 	if (vp->v_type == VDIR) {
1396 		vrele(vp);
1397 		return (EPERM);		/* POSIX */
1398 	}
1399 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1400 		vrele(vp);
1401 		return (error);
1402 	}
1403 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1404 	if ((error = namei(&nd)) == 0) {
1405 		if (nd.ni_vp != NULL) {
1406 			vrele(nd.ni_vp);
1407 			error = EEXIST;
1408 		} else {
1409 			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1410 			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1411 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1412 		}
1413 		NDFREE(&nd, NDF_ONLY_PNBUF);
1414 		vput(nd.ni_dvp);
1415 	}
1416 	vrele(vp);
1417 	vn_finished_write(mp);
1418 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1419 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1420 	return (error);
1421 }
1422 
1423 /*
1424  * Make a symbolic link.
1425  */
1426 #ifndef _SYS_SYSPROTO_H_
1427 struct symlink_args {
1428 	char	*path;
1429 	char	*link;
1430 };
1431 #endif
1432 /* ARGSUSED */
1433 int
1434 symlink(p, uap)
1435 	struct proc *p;
1436 	register struct symlink_args /* {
1437 		syscallarg(char *) path;
1438 		syscallarg(char *) link;
1439 	} */ *uap;
1440 {
1441 	struct mount *mp;
1442 	struct vattr vattr;
1443 	char *path;
1444 	int error;
1445 	struct nameidata nd;
1446 
1447 	path = zalloc(namei_zone);
1448 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1449 		goto out;
1450 restart:
1451 	bwillwrite();
1452 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1453 	if ((error = namei(&nd)) != 0)
1454 		goto out;
1455 	if (nd.ni_vp) {
1456 		NDFREE(&nd, NDF_ONLY_PNBUF);
1457 		vrele(nd.ni_vp);
1458 		vput(nd.ni_dvp);
1459 		error = EEXIST;
1460 		goto out;
1461 	}
1462 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1463 		NDFREE(&nd, NDF_ONLY_PNBUF);
1464 		vput(nd.ni_dvp);
1465 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1466 			return (error);
1467 		goto restart;
1468 	}
1469 	VATTR_NULL(&vattr);
1470 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1471 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1472 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1473 	NDFREE(&nd, NDF_ONLY_PNBUF);
1474 	if (error == 0)
1475 		vput(nd.ni_vp);
1476 	vput(nd.ni_dvp);
1477 	vn_finished_write(mp);
1478 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1479 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1480 out:
1481 	zfree(namei_zone, path);
1482 	return (error);
1483 }
1484 
1485 /*
1486  * Delete a whiteout from the filesystem.
1487  */
1488 /* ARGSUSED */
1489 int
1490 undelete(p, uap)
1491 	struct proc *p;
1492 	register struct undelete_args /* {
1493 		syscallarg(char *) path;
1494 	} */ *uap;
1495 {
1496 	int error;
1497 	struct mount *mp;
1498 	struct nameidata nd;
1499 
1500 restart:
1501 	bwillwrite();
1502 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1503 	    SCARG(uap, path), p);
1504 	error = namei(&nd);
1505 	if (error)
1506 		return (error);
1507 
1508 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1509 		NDFREE(&nd, NDF_ONLY_PNBUF);
1510 		if (nd.ni_vp)
1511 			vrele(nd.ni_vp);
1512 		vput(nd.ni_dvp);
1513 		return (EEXIST);
1514 	}
1515 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1516 		NDFREE(&nd, NDF_ONLY_PNBUF);
1517 		vput(nd.ni_dvp);
1518 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1519 			return (error);
1520 		goto restart;
1521 	}
1522 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1523 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1524 	NDFREE(&nd, NDF_ONLY_PNBUF);
1525 	vput(nd.ni_dvp);
1526 	vn_finished_write(mp);
1527 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1528 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1529 	return (error);
1530 }
1531 
1532 /*
1533  * Delete a name from the filesystem.
1534  */
1535 #ifndef _SYS_SYSPROTO_H_
1536 struct unlink_args {
1537 	char	*path;
1538 };
1539 #endif
1540 /* ARGSUSED */
1541 int
1542 unlink(p, uap)
1543 	struct proc *p;
1544 	struct unlink_args /* {
1545 		syscallarg(char *) path;
1546 	} */ *uap;
1547 {
1548 	struct mount *mp;
1549 	struct vnode *vp;
1550 	int error;
1551 	struct nameidata nd;
1552 
1553 restart:
1554 	bwillwrite();
1555 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1556 	if ((error = namei(&nd)) != 0)
1557 		return (error);
1558 	vp = nd.ni_vp;
1559 	if (vp->v_type == VDIR)
1560 		error = EPERM;		/* POSIX */
1561 	else {
1562 		/*
1563 		 * The root of a mounted filesystem cannot be deleted.
1564 		 *
1565 		 * XXX: can this only be a VDIR case?
1566 		 */
1567 		if (vp->v_flag & VROOT)
1568 			error = EBUSY;
1569 	}
1570 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1571 		NDFREE(&nd, NDF_ONLY_PNBUF);
1572 		vrele(vp);
1573 		vput(nd.ni_dvp);
1574 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1575 			return (error);
1576 		goto restart;
1577 	}
1578 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1579 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1580 	if (!error) {
1581 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1582 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1583 	}
1584 	NDFREE(&nd, NDF_ONLY_PNBUF);
1585 	vput(nd.ni_dvp);
1586 	vput(vp);
1587 	vn_finished_write(mp);
1588 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1589 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1590 	return (error);
1591 }
1592 
1593 /*
1594  * Reposition read/write file offset.
1595  */
1596 #ifndef _SYS_SYSPROTO_H_
1597 struct lseek_args {
1598 	int	fd;
1599 	int	pad;
1600 	off_t	offset;
1601 	int	whence;
1602 };
1603 #endif
1604 int
1605 lseek(p, uap)
1606 	struct proc *p;
1607 	register struct lseek_args /* {
1608 		syscallarg(int) fd;
1609 		syscallarg(int) pad;
1610 		syscallarg(off_t) offset;
1611 		syscallarg(int) whence;
1612 	} */ *uap;
1613 {
1614 	struct ucred *cred = p->p_ucred;
1615 	register struct filedesc *fdp = p->p_fd;
1616 	register struct file *fp;
1617 	struct vattr vattr;
1618 	int error;
1619 
1620 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1621 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1622 		return (EBADF);
1623 	if (fp->f_type != DTYPE_VNODE)
1624 		return (ESPIPE);
1625 	switch (SCARG(uap, whence)) {
1626 	case L_INCR:
1627 		fp->f_offset += SCARG(uap, offset);
1628 		break;
1629 	case L_XTND:
1630 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1631 		if (error)
1632 			return (error);
1633 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1634 		break;
1635 	case L_SET:
1636 		fp->f_offset = SCARG(uap, offset);
1637 		break;
1638 	default:
1639 		return (EINVAL);
1640 	}
1641 	*(off_t *)(p->p_retval) = fp->f_offset;
1642 	return (0);
1643 }
1644 
1645 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1646 /*
1647  * Reposition read/write file offset.
1648  */
1649 #ifndef _SYS_SYSPROTO_H_
1650 struct olseek_args {
1651 	int	fd;
1652 	long	offset;
1653 	int	whence;
1654 };
1655 #endif
1656 int
1657 olseek(p, uap)
1658 	struct proc *p;
1659 	register struct olseek_args /* {
1660 		syscallarg(int) fd;
1661 		syscallarg(long) offset;
1662 		syscallarg(int) whence;
1663 	} */ *uap;
1664 {
1665 	struct lseek_args /* {
1666 		syscallarg(int) fd;
1667 		syscallarg(int) pad;
1668 		syscallarg(off_t) offset;
1669 		syscallarg(int) whence;
1670 	} */ nuap;
1671 	int error;
1672 
1673 	SCARG(&nuap, fd) = SCARG(uap, fd);
1674 	SCARG(&nuap, offset) = SCARG(uap, offset);
1675 	SCARG(&nuap, whence) = SCARG(uap, whence);
1676 	error = lseek(p, &nuap);
1677 	return (error);
1678 }
1679 #endif /* COMPAT_43 */
1680 
1681 /*
1682  * Check access permissions.
1683  */
1684 #ifndef _SYS_SYSPROTO_H_
1685 struct access_args {
1686 	char	*path;
1687 	int	flags;
1688 };
1689 #endif
1690 int
1691 access(p, uap)
1692 	struct proc *p;
1693 	register struct access_args /* {
1694 		syscallarg(char *) path;
1695 		syscallarg(int) flags;
1696 	} */ *uap;
1697 {
1698 	struct ucred *cred, *tmpcred;
1699 	register struct vnode *vp;
1700 	int error, flags;
1701 	struct nameidata nd;
1702 
1703 	cred = p->p_ucred;
1704 	/*
1705 	 * Create and modify a temporary credential instead of one that
1706 	 * is potentially shared.  This could also mess up socket
1707 	 * buffer accounting which can run in an interrupt context.
1708 	 *
1709 	 * XXX - Depending on how "threads" are finally implemented, it
1710 	 * may be better to explicitly pass the credential to namei()
1711 	 * rather than to modify the potentially shared process structure.
1712 	 */
1713 	tmpcred = crdup(cred);
1714 	tmpcred->cr_uid = p->p_cred->p_ruid;
1715 	tmpcred->cr_groups[0] = p->p_cred->p_rgid;
1716 	p->p_ucred = tmpcred;
1717 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1718 	    SCARG(uap, path), p);
1719 	if ((error = namei(&nd)) != 0)
1720 		goto out1;
1721 	vp = nd.ni_vp;
1722 
1723 	/* Flags == 0 means only check for existence. */
1724 	if (SCARG(uap, flags)) {
1725 		flags = 0;
1726 		if (SCARG(uap, flags) & R_OK)
1727 			flags |= VREAD;
1728 		if (SCARG(uap, flags) & W_OK)
1729 			flags |= VWRITE;
1730 		if (SCARG(uap, flags) & X_OK)
1731 			flags |= VEXEC;
1732 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1733 			error = VOP_ACCESS(vp, flags, tmpcred, p);
1734 	}
1735 	NDFREE(&nd, NDF_ONLY_PNBUF);
1736 	vput(vp);
1737 out1:
1738 	p->p_ucred = cred;
1739 	crfree(tmpcred);
1740 	return (error);
1741 }
1742 
1743 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1744 /*
1745  * Get file status; this version follows links.
1746  */
1747 #ifndef _SYS_SYSPROTO_H_
1748 struct ostat_args {
1749 	char	*path;
1750 	struct ostat *ub;
1751 };
1752 #endif
1753 /* ARGSUSED */
1754 int
1755 ostat(p, uap)
1756 	struct proc *p;
1757 	register struct ostat_args /* {
1758 		syscallarg(char *) path;
1759 		syscallarg(struct ostat *) ub;
1760 	} */ *uap;
1761 {
1762 	struct stat sb;
1763 	struct ostat osb;
1764 	int error;
1765 	struct nameidata nd;
1766 
1767 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1768 	    SCARG(uap, path), p);
1769 	if ((error = namei(&nd)) != 0)
1770 		return (error);
1771 	NDFREE(&nd, NDF_ONLY_PNBUF);
1772 	error = vn_stat(nd.ni_vp, &sb, p);
1773 	vput(nd.ni_vp);
1774 	if (error)
1775 		return (error);
1776 	cvtstat(&sb, &osb);
1777 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1778 	return (error);
1779 }
1780 
1781 /*
1782  * Get file status; this version does not follow links.
1783  */
1784 #ifndef _SYS_SYSPROTO_H_
1785 struct olstat_args {
1786 	char	*path;
1787 	struct ostat *ub;
1788 };
1789 #endif
1790 /* ARGSUSED */
1791 int
1792 olstat(p, uap)
1793 	struct proc *p;
1794 	register struct olstat_args /* {
1795 		syscallarg(char *) path;
1796 		syscallarg(struct ostat *) ub;
1797 	} */ *uap;
1798 {
1799 	struct vnode *vp;
1800 	struct stat sb;
1801 	struct ostat osb;
1802 	int error;
1803 	struct nameidata nd;
1804 
1805 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1806 	    SCARG(uap, path), p);
1807 	if ((error = namei(&nd)) != 0)
1808 		return (error);
1809 	vp = nd.ni_vp;
1810 	error = vn_stat(vp, &sb, p);
1811 	NDFREE(&nd, NDF_ONLY_PNBUF);
1812 	vput(vp);
1813 	if (error)
1814 		return (error);
1815 	cvtstat(&sb, &osb);
1816 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1817 	return (error);
1818 }
1819 
1820 /*
1821  * Convert from an old to a new stat structure.
1822  */
1823 void
1824 cvtstat(st, ost)
1825 	struct stat *st;
1826 	struct ostat *ost;
1827 {
1828 
1829 	ost->st_dev = st->st_dev;
1830 	ost->st_ino = st->st_ino;
1831 	ost->st_mode = st->st_mode;
1832 	ost->st_nlink = st->st_nlink;
1833 	ost->st_uid = st->st_uid;
1834 	ost->st_gid = st->st_gid;
1835 	ost->st_rdev = st->st_rdev;
1836 	if (st->st_size < (quad_t)1 << 32)
1837 		ost->st_size = st->st_size;
1838 	else
1839 		ost->st_size = -2;
1840 	ost->st_atime = st->st_atime;
1841 	ost->st_mtime = st->st_mtime;
1842 	ost->st_ctime = st->st_ctime;
1843 	ost->st_blksize = st->st_blksize;
1844 	ost->st_blocks = st->st_blocks;
1845 	ost->st_flags = st->st_flags;
1846 	ost->st_gen = st->st_gen;
1847 }
1848 #endif /* COMPAT_43 || COMPAT_SUNOS */
1849 
1850 /*
1851  * Get file status; this version follows links.
1852  */
1853 #ifndef _SYS_SYSPROTO_H_
1854 struct stat_args {
1855 	char	*path;
1856 	struct stat *ub;
1857 };
1858 #endif
1859 /* ARGSUSED */
1860 int
1861 stat(p, uap)
1862 	struct proc *p;
1863 	register struct stat_args /* {
1864 		syscallarg(char *) path;
1865 		syscallarg(struct stat *) ub;
1866 	} */ *uap;
1867 {
1868 	struct stat sb;
1869 	int error;
1870 	struct nameidata nd;
1871 
1872 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1873 	    SCARG(uap, path), p);
1874 	if ((error = namei(&nd)) != 0)
1875 		return (error);
1876 	error = vn_stat(nd.ni_vp, &sb, p);
1877 	NDFREE(&nd, NDF_ONLY_PNBUF);
1878 	vput(nd.ni_vp);
1879 	if (error)
1880 		return (error);
1881 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1882 	return (error);
1883 }
1884 
1885 /*
1886  * Get file status; this version does not follow links.
1887  */
1888 #ifndef _SYS_SYSPROTO_H_
1889 struct lstat_args {
1890 	char	*path;
1891 	struct stat *ub;
1892 };
1893 #endif
1894 /* ARGSUSED */
1895 int
1896 lstat(p, uap)
1897 	struct proc *p;
1898 	register struct lstat_args /* {
1899 		syscallarg(char *) path;
1900 		syscallarg(struct stat *) ub;
1901 	} */ *uap;
1902 {
1903 	int error;
1904 	struct vnode *vp;
1905 	struct stat sb;
1906 	struct nameidata nd;
1907 
1908 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1909 	    SCARG(uap, path), p);
1910 	if ((error = namei(&nd)) != 0)
1911 		return (error);
1912 	vp = nd.ni_vp;
1913 	error = vn_stat(vp, &sb, p);
1914 	NDFREE(&nd, NDF_ONLY_PNBUF);
1915 	vput(vp);
1916 	if (error)
1917 		return (error);
1918 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1919 	return (error);
1920 }
1921 
1922 /*
1923  * Implementation of the NetBSD stat() function.
1924  * XXX This should probably be collapsed with the FreeBSD version,
1925  * as the differences are only due to vn_stat() clearing spares at
1926  * the end of the structures.  vn_stat could be split to avoid this,
1927  * and thus collapse the following to close to zero code.
1928  */
1929 void
1930 cvtnstat(sb, nsb)
1931 	struct stat *sb;
1932 	struct nstat *nsb;
1933 {
1934 	nsb->st_dev = sb->st_dev;
1935 	nsb->st_ino = sb->st_ino;
1936 	nsb->st_mode = sb->st_mode;
1937 	nsb->st_nlink = sb->st_nlink;
1938 	nsb->st_uid = sb->st_uid;
1939 	nsb->st_gid = sb->st_gid;
1940 	nsb->st_rdev = sb->st_rdev;
1941 	nsb->st_atimespec = sb->st_atimespec;
1942 	nsb->st_mtimespec = sb->st_mtimespec;
1943 	nsb->st_ctimespec = sb->st_ctimespec;
1944 	nsb->st_size = sb->st_size;
1945 	nsb->st_blocks = sb->st_blocks;
1946 	nsb->st_blksize = sb->st_blksize;
1947 	nsb->st_flags = sb->st_flags;
1948 	nsb->st_gen = sb->st_gen;
1949 	nsb->st_qspare[0] = sb->st_qspare[0];
1950 	nsb->st_qspare[1] = sb->st_qspare[1];
1951 }
1952 
1953 #ifndef _SYS_SYSPROTO_H_
1954 struct nstat_args {
1955 	char	*path;
1956 	struct nstat *ub;
1957 };
1958 #endif
1959 /* ARGSUSED */
1960 int
1961 nstat(p, uap)
1962 	struct proc *p;
1963 	register struct nstat_args /* {
1964 		syscallarg(char *) path;
1965 		syscallarg(struct nstat *) ub;
1966 	} */ *uap;
1967 {
1968 	struct stat sb;
1969 	struct nstat nsb;
1970 	int error;
1971 	struct nameidata nd;
1972 
1973 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1974 	    SCARG(uap, path), p);
1975 	if ((error = namei(&nd)) != 0)
1976 		return (error);
1977 	NDFREE(&nd, NDF_ONLY_PNBUF);
1978 	error = vn_stat(nd.ni_vp, &sb, p);
1979 	vput(nd.ni_vp);
1980 	if (error)
1981 		return (error);
1982 	cvtnstat(&sb, &nsb);
1983 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1984 	return (error);
1985 }
1986 
1987 /*
1988  * NetBSD lstat.  Get file status; this version does not follow links.
1989  */
1990 #ifndef _SYS_SYSPROTO_H_
1991 struct lstat_args {
1992 	char	*path;
1993 	struct stat *ub;
1994 };
1995 #endif
1996 /* ARGSUSED */
1997 int
1998 nlstat(p, uap)
1999 	struct proc *p;
2000 	register struct nlstat_args /* {
2001 		syscallarg(char *) path;
2002 		syscallarg(struct nstat *) ub;
2003 	} */ *uap;
2004 {
2005 	int error;
2006 	struct vnode *vp;
2007 	struct stat sb;
2008 	struct nstat nsb;
2009 	struct nameidata nd;
2010 
2011 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2012 	    SCARG(uap, path), p);
2013 	if ((error = namei(&nd)) != 0)
2014 		return (error);
2015 	vp = nd.ni_vp;
2016 	NDFREE(&nd, NDF_ONLY_PNBUF);
2017 	error = vn_stat(vp, &sb, p);
2018 	vput(vp);
2019 	if (error)
2020 		return (error);
2021 	cvtnstat(&sb, &nsb);
2022 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2023 	return (error);
2024 }
2025 
2026 /*
2027  * Get configurable pathname variables.
2028  */
2029 #ifndef _SYS_SYSPROTO_H_
2030 struct pathconf_args {
2031 	char	*path;
2032 	int	name;
2033 };
2034 #endif
2035 /* ARGSUSED */
2036 int
2037 pathconf(p, uap)
2038 	struct proc *p;
2039 	register struct pathconf_args /* {
2040 		syscallarg(char *) path;
2041 		syscallarg(int) name;
2042 	} */ *uap;
2043 {
2044 	int error;
2045 	struct nameidata nd;
2046 
2047 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2048 	    SCARG(uap, path), p);
2049 	if ((error = namei(&nd)) != 0)
2050 		return (error);
2051 	NDFREE(&nd, NDF_ONLY_PNBUF);
2052 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
2053 	vput(nd.ni_vp);
2054 	return (error);
2055 }
2056 
2057 /*
2058  * Return target name of a symbolic link.
2059  */
2060 #ifndef _SYS_SYSPROTO_H_
2061 struct readlink_args {
2062 	char	*path;
2063 	char	*buf;
2064 	int	count;
2065 };
2066 #endif
2067 /* ARGSUSED */
2068 int
2069 readlink(p, uap)
2070 	struct proc *p;
2071 	register struct readlink_args /* {
2072 		syscallarg(char *) path;
2073 		syscallarg(char *) buf;
2074 		syscallarg(int) count;
2075 	} */ *uap;
2076 {
2077 	register struct vnode *vp;
2078 	struct iovec aiov;
2079 	struct uio auio;
2080 	int error;
2081 	struct nameidata nd;
2082 
2083 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2084 	    SCARG(uap, path), p);
2085 	if ((error = namei(&nd)) != 0)
2086 		return (error);
2087 	NDFREE(&nd, NDF_ONLY_PNBUF);
2088 	vp = nd.ni_vp;
2089 	if (vp->v_type != VLNK)
2090 		error = EINVAL;
2091 	else {
2092 		aiov.iov_base = SCARG(uap, buf);
2093 		aiov.iov_len = SCARG(uap, count);
2094 		auio.uio_iov = &aiov;
2095 		auio.uio_iovcnt = 1;
2096 		auio.uio_offset = 0;
2097 		auio.uio_rw = UIO_READ;
2098 		auio.uio_segflg = UIO_USERSPACE;
2099 		auio.uio_procp = p;
2100 		auio.uio_resid = SCARG(uap, count);
2101 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2102 	}
2103 	vput(vp);
2104 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2105 	return (error);
2106 }
2107 
2108 /*
2109  * Common implementation code for chflags() and fchflags().
2110  */
2111 static int
2112 setfflags(p, vp, flags)
2113 	struct proc *p;
2114 	struct vnode *vp;
2115 	int flags;
2116 {
2117 	int error;
2118 	struct mount *mp;
2119 	struct vattr vattr;
2120 
2121 	/*
2122 	 * Prevent non-root users from setting flags on devices.  When
2123 	 * a device is reused, users can retain ownership of the device
2124 	 * if they are allowed to set flags and programs assume that
2125 	 * chown can't fail when done as root.
2126 	 */
2127 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2128 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
2129 		return (error);
2130 
2131 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2132 		return (error);
2133 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2134 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2135 	VATTR_NULL(&vattr);
2136 	vattr.va_flags = flags;
2137 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2138 	VOP_UNLOCK(vp, 0, p);
2139 	vn_finished_write(mp);
2140 	return (error);
2141 }
2142 
2143 /*
2144  * Change flags of a file given a path name.
2145  */
2146 #ifndef _SYS_SYSPROTO_H_
2147 struct chflags_args {
2148 	char	*path;
2149 	int	flags;
2150 };
2151 #endif
2152 /* ARGSUSED */
2153 int
2154 chflags(p, uap)
2155 	struct proc *p;
2156 	register struct chflags_args /* {
2157 		syscallarg(char *) path;
2158 		syscallarg(int) flags;
2159 	} */ *uap;
2160 {
2161 	int error;
2162 	struct nameidata nd;
2163 
2164 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2165 	if ((error = namei(&nd)) != 0)
2166 		return (error);
2167 	NDFREE(&nd, NDF_ONLY_PNBUF);
2168 	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2169 	vrele(nd.ni_vp);
2170 	return error;
2171 }
2172 
2173 /*
2174  * Change flags of a file given a file descriptor.
2175  */
2176 #ifndef _SYS_SYSPROTO_H_
2177 struct fchflags_args {
2178 	int	fd;
2179 	int	flags;
2180 };
2181 #endif
2182 /* ARGSUSED */
2183 int
2184 fchflags(p, uap)
2185 	struct proc *p;
2186 	register struct fchflags_args /* {
2187 		syscallarg(int) fd;
2188 		syscallarg(int) flags;
2189 	} */ *uap;
2190 {
2191 	struct file *fp;
2192 	int error;
2193 
2194 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2195 		return (error);
2196 	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2197 }
2198 
2199 /*
2200  * Common implementation code for chmod(), lchmod() and fchmod().
2201  */
2202 static int
2203 setfmode(p, vp, mode)
2204 	struct proc *p;
2205 	struct vnode *vp;
2206 	int mode;
2207 {
2208 	int error;
2209 	struct mount *mp;
2210 	struct vattr vattr;
2211 
2212 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2213 		return (error);
2214 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2215 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2216 	VATTR_NULL(&vattr);
2217 	vattr.va_mode = mode & ALLPERMS;
2218 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2219 	VOP_UNLOCK(vp, 0, p);
2220 	vn_finished_write(mp);
2221 	return error;
2222 }
2223 
2224 /*
2225  * Change mode of a file given path name.
2226  */
2227 #ifndef _SYS_SYSPROTO_H_
2228 struct chmod_args {
2229 	char	*path;
2230 	int	mode;
2231 };
2232 #endif
2233 /* ARGSUSED */
2234 int
2235 chmod(p, uap)
2236 	struct proc *p;
2237 	register struct chmod_args /* {
2238 		syscallarg(char *) path;
2239 		syscallarg(int) mode;
2240 	} */ *uap;
2241 {
2242 	int error;
2243 	struct nameidata nd;
2244 
2245 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2246 	if ((error = namei(&nd)) != 0)
2247 		return (error);
2248 	NDFREE(&nd, NDF_ONLY_PNBUF);
2249 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2250 	vrele(nd.ni_vp);
2251 	return error;
2252 }
2253 
2254 /*
2255  * Change mode of a file given path name (don't follow links.)
2256  */
2257 #ifndef _SYS_SYSPROTO_H_
2258 struct lchmod_args {
2259 	char	*path;
2260 	int	mode;
2261 };
2262 #endif
2263 /* ARGSUSED */
2264 int
2265 lchmod(p, uap)
2266 	struct proc *p;
2267 	register struct lchmod_args /* {
2268 		syscallarg(char *) path;
2269 		syscallarg(int) mode;
2270 	} */ *uap;
2271 {
2272 	int error;
2273 	struct nameidata nd;
2274 
2275 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2276 	if ((error = namei(&nd)) != 0)
2277 		return (error);
2278 	NDFREE(&nd, NDF_ONLY_PNBUF);
2279 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2280 	vrele(nd.ni_vp);
2281 	return error;
2282 }
2283 
2284 /*
2285  * Change mode of a file given a file descriptor.
2286  */
2287 #ifndef _SYS_SYSPROTO_H_
2288 struct fchmod_args {
2289 	int	fd;
2290 	int	mode;
2291 };
2292 #endif
2293 /* ARGSUSED */
2294 int
2295 fchmod(p, uap)
2296 	struct proc *p;
2297 	register struct fchmod_args /* {
2298 		syscallarg(int) fd;
2299 		syscallarg(int) mode;
2300 	} */ *uap;
2301 {
2302 	struct file *fp;
2303 	int error;
2304 
2305 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2306 		return (error);
2307 	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2308 }
2309 
2310 /*
2311  * Common implementation for chown(), lchown(), and fchown()
2312  */
2313 static int
2314 setfown(p, vp, uid, gid)
2315 	struct proc *p;
2316 	struct vnode *vp;
2317 	uid_t uid;
2318 	gid_t gid;
2319 {
2320 	int error;
2321 	struct mount *mp;
2322 	struct vattr vattr;
2323 
2324 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2325 		return (error);
2326 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2327 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2328 	VATTR_NULL(&vattr);
2329 	vattr.va_uid = uid;
2330 	vattr.va_gid = gid;
2331 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2332 	VOP_UNLOCK(vp, 0, p);
2333 	vn_finished_write(mp);
2334 	return error;
2335 }
2336 
2337 /*
2338  * Set ownership given a path name.
2339  */
2340 #ifndef _SYS_SYSPROTO_H_
2341 struct chown_args {
2342 	char	*path;
2343 	int	uid;
2344 	int	gid;
2345 };
2346 #endif
2347 /* ARGSUSED */
2348 int
2349 chown(p, uap)
2350 	struct proc *p;
2351 	register struct chown_args /* {
2352 		syscallarg(char *) path;
2353 		syscallarg(int) uid;
2354 		syscallarg(int) gid;
2355 	} */ *uap;
2356 {
2357 	int error;
2358 	struct nameidata nd;
2359 
2360 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2361 	if ((error = namei(&nd)) != 0)
2362 		return (error);
2363 	NDFREE(&nd, NDF_ONLY_PNBUF);
2364 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2365 	vrele(nd.ni_vp);
2366 	return (error);
2367 }
2368 
2369 /*
2370  * Set ownership given a path name, do not cross symlinks.
2371  */
2372 #ifndef _SYS_SYSPROTO_H_
2373 struct lchown_args {
2374 	char	*path;
2375 	int	uid;
2376 	int	gid;
2377 };
2378 #endif
2379 /* ARGSUSED */
2380 int
2381 lchown(p, uap)
2382 	struct proc *p;
2383 	register struct lchown_args /* {
2384 		syscallarg(char *) path;
2385 		syscallarg(int) uid;
2386 		syscallarg(int) gid;
2387 	} */ *uap;
2388 {
2389 	int error;
2390 	struct nameidata nd;
2391 
2392 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2393 	if ((error = namei(&nd)) != 0)
2394 		return (error);
2395 	NDFREE(&nd, NDF_ONLY_PNBUF);
2396 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2397 	vrele(nd.ni_vp);
2398 	return (error);
2399 }
2400 
2401 /*
2402  * Set ownership given a file descriptor.
2403  */
2404 #ifndef _SYS_SYSPROTO_H_
2405 struct fchown_args {
2406 	int	fd;
2407 	int	uid;
2408 	int	gid;
2409 };
2410 #endif
2411 /* ARGSUSED */
2412 int
2413 fchown(p, uap)
2414 	struct proc *p;
2415 	register struct fchown_args /* {
2416 		syscallarg(int) fd;
2417 		syscallarg(int) uid;
2418 		syscallarg(int) gid;
2419 	} */ *uap;
2420 {
2421 	struct file *fp;
2422 	int error;
2423 
2424 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2425 		return (error);
2426 	return setfown(p, (struct vnode *)fp->f_data,
2427 		SCARG(uap, uid), SCARG(uap, gid));
2428 }
2429 
2430 /*
2431  * Common implementation code for utimes(), lutimes(), and futimes().
2432  */
2433 static int
2434 getutimes(usrtvp, tsp)
2435 	const struct timeval *usrtvp;
2436 	struct timespec *tsp;
2437 {
2438 	struct timeval tv[2];
2439 	int error;
2440 
2441 	if (usrtvp == NULL) {
2442 		microtime(&tv[0]);
2443 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2444 		tsp[1] = tsp[0];
2445 	} else {
2446 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2447 			return (error);
2448 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2449 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2450 	}
2451 	return 0;
2452 }
2453 
2454 /*
2455  * Common implementation code for utimes(), lutimes(), and futimes().
2456  */
2457 static int
2458 setutimes(p, vp, ts, nullflag)
2459 	struct proc *p;
2460 	struct vnode *vp;
2461 	const struct timespec *ts;
2462 	int nullflag;
2463 {
2464 	int error;
2465 	struct mount *mp;
2466 	struct vattr vattr;
2467 
2468 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2469 		return (error);
2470 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2471 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2472 	VATTR_NULL(&vattr);
2473 	vattr.va_atime = ts[0];
2474 	vattr.va_mtime = ts[1];
2475 	if (nullflag)
2476 		vattr.va_vaflags |= VA_UTIMES_NULL;
2477 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2478 	VOP_UNLOCK(vp, 0, p);
2479 	vn_finished_write(mp);
2480 	return error;
2481 }
2482 
2483 /*
2484  * Set the access and modification times of a file.
2485  */
2486 #ifndef _SYS_SYSPROTO_H_
2487 struct utimes_args {
2488 	char	*path;
2489 	struct	timeval *tptr;
2490 };
2491 #endif
2492 /* ARGSUSED */
2493 int
2494 utimes(p, uap)
2495 	struct proc *p;
2496 	register struct utimes_args /* {
2497 		syscallarg(char *) path;
2498 		syscallarg(struct timeval *) tptr;
2499 	} */ *uap;
2500 {
2501 	struct timespec ts[2];
2502 	struct timeval *usrtvp;
2503 	int error;
2504 	struct nameidata nd;
2505 
2506 	usrtvp = SCARG(uap, tptr);
2507 	if ((error = getutimes(usrtvp, ts)) != 0)
2508 		return (error);
2509 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2510 	if ((error = namei(&nd)) != 0)
2511 		return (error);
2512 	NDFREE(&nd, NDF_ONLY_PNBUF);
2513 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2514 	vrele(nd.ni_vp);
2515 	return (error);
2516 }
2517 
2518 /*
2519  * Set the access and modification times of a file.
2520  */
2521 #ifndef _SYS_SYSPROTO_H_
2522 struct lutimes_args {
2523 	char	*path;
2524 	struct	timeval *tptr;
2525 };
2526 #endif
2527 /* ARGSUSED */
2528 int
2529 lutimes(p, uap)
2530 	struct proc *p;
2531 	register struct lutimes_args /* {
2532 		syscallarg(char *) path;
2533 		syscallarg(struct timeval *) tptr;
2534 	} */ *uap;
2535 {
2536 	struct timespec ts[2];
2537 	struct timeval *usrtvp;
2538 	int error;
2539 	struct nameidata nd;
2540 
2541 	usrtvp = SCARG(uap, tptr);
2542 	if ((error = getutimes(usrtvp, ts)) != 0)
2543 		return (error);
2544 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2545 	if ((error = namei(&nd)) != 0)
2546 		return (error);
2547 	NDFREE(&nd, NDF_ONLY_PNBUF);
2548 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2549 	vrele(nd.ni_vp);
2550 	return (error);
2551 }
2552 
2553 /*
2554  * Set the access and modification times of a file.
2555  */
2556 #ifndef _SYS_SYSPROTO_H_
2557 struct futimes_args {
2558 	int	fd;
2559 	struct	timeval *tptr;
2560 };
2561 #endif
2562 /* ARGSUSED */
2563 int
2564 futimes(p, uap)
2565 	struct proc *p;
2566 	register struct futimes_args /* {
2567 		syscallarg(int ) fd;
2568 		syscallarg(struct timeval *) tptr;
2569 	} */ *uap;
2570 {
2571 	struct timespec ts[2];
2572 	struct file *fp;
2573 	struct timeval *usrtvp;
2574 	int error;
2575 
2576 	usrtvp = SCARG(uap, tptr);
2577 	if ((error = getutimes(usrtvp, ts)) != 0)
2578 		return (error);
2579 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2580 		return (error);
2581 	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2582 }
2583 
2584 /*
2585  * Truncate a file given its path name.
2586  */
2587 #ifndef _SYS_SYSPROTO_H_
2588 struct truncate_args {
2589 	char	*path;
2590 	int	pad;
2591 	off_t	length;
2592 };
2593 #endif
2594 /* ARGSUSED */
2595 int
2596 truncate(p, uap)
2597 	struct proc *p;
2598 	register struct truncate_args /* {
2599 		syscallarg(char *) path;
2600 		syscallarg(int) pad;
2601 		syscallarg(off_t) length;
2602 	} */ *uap;
2603 {
2604 	struct mount *mp;
2605 	struct vnode *vp;
2606 	struct vattr vattr;
2607 	int error;
2608 	struct nameidata nd;
2609 
2610 	if (uap->length < 0)
2611 		return(EINVAL);
2612 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2613 	if ((error = namei(&nd)) != 0)
2614 		return (error);
2615 	vp = nd.ni_vp;
2616 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2617 		vrele(vp);
2618 		return (error);
2619 	}
2620 	NDFREE(&nd, NDF_ONLY_PNBUF);
2621 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2622 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2623 	if (vp->v_type == VDIR)
2624 		error = EISDIR;
2625 	else if ((error = vn_writechk(vp)) == 0 &&
2626 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2627 		VATTR_NULL(&vattr);
2628 		vattr.va_size = SCARG(uap, length);
2629 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2630 	}
2631 	vput(vp);
2632 	vn_finished_write(mp);
2633 	return (error);
2634 }
2635 
2636 /*
2637  * Truncate a file given a file descriptor.
2638  */
2639 #ifndef _SYS_SYSPROTO_H_
2640 struct ftruncate_args {
2641 	int	fd;
2642 	int	pad;
2643 	off_t	length;
2644 };
2645 #endif
2646 /* ARGSUSED */
2647 int
2648 ftruncate(p, uap)
2649 	struct proc *p;
2650 	register struct ftruncate_args /* {
2651 		syscallarg(int) fd;
2652 		syscallarg(int) pad;
2653 		syscallarg(off_t) length;
2654 	} */ *uap;
2655 {
2656 	struct mount *mp;
2657 	struct vattr vattr;
2658 	struct vnode *vp;
2659 	struct file *fp;
2660 	int error;
2661 
2662 	if (uap->length < 0)
2663 		return(EINVAL);
2664 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2665 		return (error);
2666 	if ((fp->f_flag & FWRITE) == 0)
2667 		return (EINVAL);
2668 	vp = (struct vnode *)fp->f_data;
2669 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2670 		return (error);
2671 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2672 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2673 	if (vp->v_type == VDIR)
2674 		error = EISDIR;
2675 	else if ((error = vn_writechk(vp)) == 0) {
2676 		VATTR_NULL(&vattr);
2677 		vattr.va_size = SCARG(uap, length);
2678 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2679 	}
2680 	VOP_UNLOCK(vp, 0, p);
2681 	vn_finished_write(mp);
2682 	return (error);
2683 }
2684 
2685 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2686 /*
2687  * Truncate a file given its path name.
2688  */
2689 #ifndef _SYS_SYSPROTO_H_
2690 struct otruncate_args {
2691 	char	*path;
2692 	long	length;
2693 };
2694 #endif
2695 /* ARGSUSED */
2696 int
2697 otruncate(p, uap)
2698 	struct proc *p;
2699 	register struct otruncate_args /* {
2700 		syscallarg(char *) path;
2701 		syscallarg(long) length;
2702 	} */ *uap;
2703 {
2704 	struct truncate_args /* {
2705 		syscallarg(char *) path;
2706 		syscallarg(int) pad;
2707 		syscallarg(off_t) length;
2708 	} */ nuap;
2709 
2710 	SCARG(&nuap, path) = SCARG(uap, path);
2711 	SCARG(&nuap, length) = SCARG(uap, length);
2712 	return (truncate(p, &nuap));
2713 }
2714 
2715 /*
2716  * Truncate a file given a file descriptor.
2717  */
2718 #ifndef _SYS_SYSPROTO_H_
2719 struct oftruncate_args {
2720 	int	fd;
2721 	long	length;
2722 };
2723 #endif
2724 /* ARGSUSED */
2725 int
2726 oftruncate(p, uap)
2727 	struct proc *p;
2728 	register struct oftruncate_args /* {
2729 		syscallarg(int) fd;
2730 		syscallarg(long) length;
2731 	} */ *uap;
2732 {
2733 	struct ftruncate_args /* {
2734 		syscallarg(int) fd;
2735 		syscallarg(int) pad;
2736 		syscallarg(off_t) length;
2737 	} */ nuap;
2738 
2739 	SCARG(&nuap, fd) = SCARG(uap, fd);
2740 	SCARG(&nuap, length) = SCARG(uap, length);
2741 	return (ftruncate(p, &nuap));
2742 }
2743 #endif /* COMPAT_43 || COMPAT_SUNOS */
2744 
2745 /*
2746  * Sync an open file.
2747  */
2748 #ifndef _SYS_SYSPROTO_H_
2749 struct fsync_args {
2750 	int	fd;
2751 };
2752 #endif
2753 /* ARGSUSED */
2754 int
2755 fsync(p, uap)
2756 	struct proc *p;
2757 	struct fsync_args /* {
2758 		syscallarg(int) fd;
2759 	} */ *uap;
2760 {
2761 	struct vnode *vp;
2762 	struct mount *mp;
2763 	struct file *fp;
2764 	vm_object_t obj;
2765 	int error;
2766 
2767 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2768 		return (error);
2769 	vp = (struct vnode *)fp->f_data;
2770 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2771 		return (error);
2772 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2773 	if (VOP_GETVOBJECT(vp, &obj) == 0)
2774 		vm_object_page_clean(obj, 0, 0, 0);
2775 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2776 #ifdef SOFTUPDATES
2777 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2778 	    error = softdep_fsync(vp);
2779 #endif
2780 
2781 	VOP_UNLOCK(vp, 0, p);
2782 	vn_finished_write(mp);
2783 	return (error);
2784 }
2785 
2786 /*
2787  * Rename files.  Source and destination must either both be directories,
2788  * or both not be directories.  If target is a directory, it must be empty.
2789  */
2790 #ifndef _SYS_SYSPROTO_H_
2791 struct rename_args {
2792 	char	*from;
2793 	char	*to;
2794 };
2795 #endif
2796 /* ARGSUSED */
2797 int
2798 rename(p, uap)
2799 	struct proc *p;
2800 	register struct rename_args /* {
2801 		syscallarg(char *) from;
2802 		syscallarg(char *) to;
2803 	} */ *uap;
2804 {
2805 	struct mount *mp;
2806 	struct vnode *tvp, *fvp, *tdvp;
2807 	struct nameidata fromnd, tond;
2808 	int error;
2809 
2810 	bwillwrite();
2811 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2812 	    SCARG(uap, from), p);
2813 	if ((error = namei(&fromnd)) != 0)
2814 		return (error);
2815 	fvp = fromnd.ni_vp;
2816 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2817 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2818 		vrele(fromnd.ni_dvp);
2819 		vrele(fvp);
2820 		goto out1;
2821 	}
2822 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2823 	    UIO_USERSPACE, SCARG(uap, to), p);
2824 	if (fromnd.ni_vp->v_type == VDIR)
2825 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2826 	if ((error = namei(&tond)) != 0) {
2827 		/* Translate error code for rename("dir1", "dir2/."). */
2828 		if (error == EISDIR && fvp->v_type == VDIR)
2829 			error = EINVAL;
2830 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2831 		vrele(fromnd.ni_dvp);
2832 		vrele(fvp);
2833 		goto out1;
2834 	}
2835 	tdvp = tond.ni_dvp;
2836 	tvp = tond.ni_vp;
2837 	if (tvp != NULL) {
2838 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2839 			error = ENOTDIR;
2840 			goto out;
2841 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2842 			error = EISDIR;
2843 			goto out;
2844 		}
2845 	}
2846 	if (fvp == tdvp)
2847 		error = EINVAL;
2848 	/*
2849 	 * If source is the same as the destination (that is the
2850 	 * same inode number with the same name in the same directory),
2851 	 * then there is nothing to do.
2852 	 */
2853 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2854 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2855 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2856 	      fromnd.ni_cnd.cn_namelen))
2857 		error = -1;
2858 out:
2859 	if (!error) {
2860 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2861 		if (fromnd.ni_dvp != tdvp) {
2862 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2863 		}
2864 		if (tvp) {
2865 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2866 		}
2867 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2868 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2869 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2870 		NDFREE(&tond, NDF_ONLY_PNBUF);
2871 	} else {
2872 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2873 		NDFREE(&tond, NDF_ONLY_PNBUF);
2874 		if (tdvp == tvp)
2875 			vrele(tdvp);
2876 		else
2877 			vput(tdvp);
2878 		if (tvp)
2879 			vput(tvp);
2880 		vrele(fromnd.ni_dvp);
2881 		vrele(fvp);
2882 	}
2883 	vrele(tond.ni_startdir);
2884 	vn_finished_write(mp);
2885 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2886 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2887 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2888 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2889 out1:
2890 	if (fromnd.ni_startdir)
2891 		vrele(fromnd.ni_startdir);
2892 	if (error == -1)
2893 		return (0);
2894 	return (error);
2895 }
2896 
2897 /*
2898  * Make a directory file.
2899  */
2900 #ifndef _SYS_SYSPROTO_H_
2901 struct mkdir_args {
2902 	char	*path;
2903 	int	mode;
2904 };
2905 #endif
2906 /* ARGSUSED */
2907 int
2908 mkdir(p, uap)
2909 	struct proc *p;
2910 	register struct mkdir_args /* {
2911 		syscallarg(char *) path;
2912 		syscallarg(int) mode;
2913 	} */ *uap;
2914 {
2915 	struct mount *mp;
2916 	struct vnode *vp;
2917 	struct vattr vattr;
2918 	int error;
2919 	struct nameidata nd;
2920 
2921 restart:
2922 	bwillwrite();
2923 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2924 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2925 	if ((error = namei(&nd)) != 0)
2926 		return (error);
2927 	vp = nd.ni_vp;
2928 	if (vp != NULL) {
2929 		NDFREE(&nd, NDF_ONLY_PNBUF);
2930 		vrele(vp);
2931 		vput(nd.ni_dvp);
2932 		return (EEXIST);
2933 	}
2934 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2935 		NDFREE(&nd, NDF_ONLY_PNBUF);
2936 		vput(nd.ni_dvp);
2937 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2938 			return (error);
2939 		goto restart;
2940 	}
2941 	VATTR_NULL(&vattr);
2942 	vattr.va_type = VDIR;
2943 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2944 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2945 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2946 	NDFREE(&nd, NDF_ONLY_PNBUF);
2947 	vput(nd.ni_dvp);
2948 	if (!error)
2949 		vput(nd.ni_vp);
2950 	vn_finished_write(mp);
2951 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2952 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2953 	return (error);
2954 }
2955 
2956 /*
2957  * Remove a directory file.
2958  */
2959 #ifndef _SYS_SYSPROTO_H_
2960 struct rmdir_args {
2961 	char	*path;
2962 };
2963 #endif
2964 /* ARGSUSED */
2965 int
2966 rmdir(p, uap)
2967 	struct proc *p;
2968 	struct rmdir_args /* {
2969 		syscallarg(char *) path;
2970 	} */ *uap;
2971 {
2972 	struct mount *mp;
2973 	struct vnode *vp;
2974 	int error;
2975 	struct nameidata nd;
2976 
2977 restart:
2978 	bwillwrite();
2979 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2980 	    SCARG(uap, path), p);
2981 	if ((error = namei(&nd)) != 0)
2982 		return (error);
2983 	vp = nd.ni_vp;
2984 	if (vp->v_type != VDIR) {
2985 		error = ENOTDIR;
2986 		goto out;
2987 	}
2988 	/*
2989 	 * No rmdir "." please.
2990 	 */
2991 	if (nd.ni_dvp == vp) {
2992 		error = EINVAL;
2993 		goto out;
2994 	}
2995 	/*
2996 	 * The root of a mounted filesystem cannot be deleted.
2997 	 */
2998 	if (vp->v_flag & VROOT) {
2999 		error = EBUSY;
3000 		goto out;
3001 	}
3002 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3003 		NDFREE(&nd, NDF_ONLY_PNBUF);
3004 		if (nd.ni_dvp == vp)
3005 			vrele(nd.ni_dvp);
3006 		else
3007 			vput(nd.ni_dvp);
3008 		vput(vp);
3009 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3010 			return (error);
3011 		goto restart;
3012 	}
3013 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3014 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3015 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3016 	vn_finished_write(mp);
3017 out:
3018 	NDFREE(&nd, NDF_ONLY_PNBUF);
3019 	if (nd.ni_dvp == vp)
3020 		vrele(nd.ni_dvp);
3021 	else
3022 		vput(nd.ni_dvp);
3023 	vput(vp);
3024 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3025 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3026 	return (error);
3027 }
3028 
3029 #ifdef COMPAT_43
3030 /*
3031  * Read a block of directory entries in a file system independent format.
3032  */
3033 #ifndef _SYS_SYSPROTO_H_
3034 struct ogetdirentries_args {
3035 	int	fd;
3036 	char	*buf;
3037 	u_int	count;
3038 	long	*basep;
3039 };
3040 #endif
3041 int
3042 ogetdirentries(p, uap)
3043 	struct proc *p;
3044 	register struct ogetdirentries_args /* {
3045 		syscallarg(int) fd;
3046 		syscallarg(char *) buf;
3047 		syscallarg(u_int) count;
3048 		syscallarg(long *) basep;
3049 	} */ *uap;
3050 {
3051 	struct vnode *vp;
3052 	struct file *fp;
3053 	struct uio auio, kuio;
3054 	struct iovec aiov, kiov;
3055 	struct dirent *dp, *edp;
3056 	caddr_t dirbuf;
3057 	int error, eofflag, readcnt;
3058 	long loff;
3059 
3060 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3061 		return (error);
3062 	if ((fp->f_flag & FREAD) == 0)
3063 		return (EBADF);
3064 	vp = (struct vnode *)fp->f_data;
3065 unionread:
3066 	if (vp->v_type != VDIR)
3067 		return (EINVAL);
3068 	aiov.iov_base = SCARG(uap, buf);
3069 	aiov.iov_len = SCARG(uap, count);
3070 	auio.uio_iov = &aiov;
3071 	auio.uio_iovcnt = 1;
3072 	auio.uio_rw = UIO_READ;
3073 	auio.uio_segflg = UIO_USERSPACE;
3074 	auio.uio_procp = p;
3075 	auio.uio_resid = SCARG(uap, count);
3076 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3077 	loff = auio.uio_offset = fp->f_offset;
3078 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3079 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3080 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3081 			    NULL, NULL);
3082 			fp->f_offset = auio.uio_offset;
3083 		} else
3084 #	endif
3085 	{
3086 		kuio = auio;
3087 		kuio.uio_iov = &kiov;
3088 		kuio.uio_segflg = UIO_SYSSPACE;
3089 		kiov.iov_len = SCARG(uap, count);
3090 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3091 		kiov.iov_base = dirbuf;
3092 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3093 			    NULL, NULL);
3094 		fp->f_offset = kuio.uio_offset;
3095 		if (error == 0) {
3096 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3097 			edp = (struct dirent *)&dirbuf[readcnt];
3098 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3099 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3100 					/*
3101 					 * The expected low byte of
3102 					 * dp->d_namlen is our dp->d_type.
3103 					 * The high MBZ byte of dp->d_namlen
3104 					 * is our dp->d_namlen.
3105 					 */
3106 					dp->d_type = dp->d_namlen;
3107 					dp->d_namlen = 0;
3108 #				else
3109 					/*
3110 					 * The dp->d_type is the high byte
3111 					 * of the expected dp->d_namlen,
3112 					 * so must be zero'ed.
3113 					 */
3114 					dp->d_type = 0;
3115 #				endif
3116 				if (dp->d_reclen > 0) {
3117 					dp = (struct dirent *)
3118 					    ((char *)dp + dp->d_reclen);
3119 				} else {
3120 					error = EIO;
3121 					break;
3122 				}
3123 			}
3124 			if (dp >= edp)
3125 				error = uiomove(dirbuf, readcnt, &auio);
3126 		}
3127 		FREE(dirbuf, M_TEMP);
3128 	}
3129 	VOP_UNLOCK(vp, 0, p);
3130 	if (error)
3131 		return (error);
3132 	if (SCARG(uap, count) == auio.uio_resid) {
3133 		if (union_dircheckp) {
3134 			error = union_dircheckp(p, &vp, fp);
3135 			if (error == -1)
3136 				goto unionread;
3137 			if (error)
3138 				return (error);
3139 		}
3140 		if ((vp->v_flag & VROOT) &&
3141 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3142 			struct vnode *tvp = vp;
3143 			vp = vp->v_mount->mnt_vnodecovered;
3144 			VREF(vp);
3145 			fp->f_data = (caddr_t) vp;
3146 			fp->f_offset = 0;
3147 			vrele(tvp);
3148 			goto unionread;
3149 		}
3150 	}
3151 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3152 	    sizeof(long));
3153 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3154 	return (error);
3155 }
3156 #endif /* COMPAT_43 */
3157 
3158 /*
3159  * Read a block of directory entries in a file system independent format.
3160  */
3161 #ifndef _SYS_SYSPROTO_H_
3162 struct getdirentries_args {
3163 	int	fd;
3164 	char	*buf;
3165 	u_int	count;
3166 	long	*basep;
3167 };
3168 #endif
3169 int
3170 getdirentries(p, uap)
3171 	struct proc *p;
3172 	register struct getdirentries_args /* {
3173 		syscallarg(int) fd;
3174 		syscallarg(char *) buf;
3175 		syscallarg(u_int) count;
3176 		syscallarg(long *) basep;
3177 	} */ *uap;
3178 {
3179 	struct vnode *vp;
3180 	struct file *fp;
3181 	struct uio auio;
3182 	struct iovec aiov;
3183 	long loff;
3184 	int error, eofflag;
3185 
3186 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3187 		return (error);
3188 	if ((fp->f_flag & FREAD) == 0)
3189 		return (EBADF);
3190 	vp = (struct vnode *)fp->f_data;
3191 unionread:
3192 	if (vp->v_type != VDIR)
3193 		return (EINVAL);
3194 	aiov.iov_base = SCARG(uap, buf);
3195 	aiov.iov_len = SCARG(uap, count);
3196 	auio.uio_iov = &aiov;
3197 	auio.uio_iovcnt = 1;
3198 	auio.uio_rw = UIO_READ;
3199 	auio.uio_segflg = UIO_USERSPACE;
3200 	auio.uio_procp = p;
3201 	auio.uio_resid = SCARG(uap, count);
3202 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3203 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3204 	loff = auio.uio_offset = fp->f_offset;
3205 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3206 	fp->f_offset = auio.uio_offset;
3207 	VOP_UNLOCK(vp, 0, p);
3208 	if (error)
3209 		return (error);
3210 	if (SCARG(uap, count) == auio.uio_resid) {
3211 		if (union_dircheckp) {
3212 			error = union_dircheckp(p, &vp, fp);
3213 			if (error == -1)
3214 				goto unionread;
3215 			if (error)
3216 				return (error);
3217 		}
3218 		if ((vp->v_flag & VROOT) &&
3219 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3220 			struct vnode *tvp = vp;
3221 			vp = vp->v_mount->mnt_vnodecovered;
3222 			VREF(vp);
3223 			fp->f_data = (caddr_t) vp;
3224 			fp->f_offset = 0;
3225 			vrele(tvp);
3226 			goto unionread;
3227 		}
3228 	}
3229 	if (SCARG(uap, basep) != NULL) {
3230 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3231 		    sizeof(long));
3232 	}
3233 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3234 	return (error);
3235 }
3236 #ifndef _SYS_SYSPROTO_H_
3237 struct getdents_args {
3238 	int fd;
3239 	char *buf;
3240 	size_t count;
3241 };
3242 #endif
3243 int
3244 getdents(p, uap)
3245 	struct proc *p;
3246 	register struct getdents_args /* {
3247 		syscallarg(int) fd;
3248 		syscallarg(char *) buf;
3249 		syscallarg(u_int) count;
3250 	} */ *uap;
3251 {
3252 	struct getdirentries_args ap;
3253 	ap.fd = uap->fd;
3254 	ap.buf = uap->buf;
3255 	ap.count = uap->count;
3256 	ap.basep = NULL;
3257 	return getdirentries(p, &ap);
3258 }
3259 
3260 /*
3261  * Set the mode mask for creation of filesystem nodes.
3262  *
3263  * MP SAFE
3264  */
3265 #ifndef _SYS_SYSPROTO_H_
3266 struct umask_args {
3267 	int	newmask;
3268 };
3269 #endif
3270 int
3271 umask(p, uap)
3272 	struct proc *p;
3273 	struct umask_args /* {
3274 		syscallarg(int) newmask;
3275 	} */ *uap;
3276 {
3277 	register struct filedesc *fdp;
3278 
3279 	fdp = p->p_fd;
3280 	p->p_retval[0] = fdp->fd_cmask;
3281 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3282 	return (0);
3283 }
3284 
3285 /*
3286  * Void all references to file by ripping underlying filesystem
3287  * away from vnode.
3288  */
3289 #ifndef _SYS_SYSPROTO_H_
3290 struct revoke_args {
3291 	char	*path;
3292 };
3293 #endif
3294 /* ARGSUSED */
3295 int
3296 revoke(p, uap)
3297 	struct proc *p;
3298 	register struct revoke_args /* {
3299 		syscallarg(char *) path;
3300 	} */ *uap;
3301 {
3302 	struct mount *mp;
3303 	struct vnode *vp;
3304 	struct vattr vattr;
3305 	int error;
3306 	struct nameidata nd;
3307 
3308 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3309 	if ((error = namei(&nd)) != 0)
3310 		return (error);
3311 	vp = nd.ni_vp;
3312 	NDFREE(&nd, NDF_ONLY_PNBUF);
3313 	if (vp->v_type != VCHR) {
3314 		error = EINVAL;
3315 		goto out;
3316 	}
3317 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3318 		goto out;
3319 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3320 	    (error = suser_xxx(0, p, PRISON_ROOT)))
3321 		goto out;
3322 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3323 		goto out;
3324 	if (vcount(vp) > 1)
3325 		VOP_REVOKE(vp, REVOKEALL);
3326 	vn_finished_write(mp);
3327 out:
3328 	vrele(vp);
3329 	return (error);
3330 }
3331 
3332 /*
3333  * Convert a user file descriptor to a kernel file entry.
3334  */
3335 int
3336 getvnode(fdp, fd, fpp)
3337 	struct filedesc *fdp;
3338 	int fd;
3339 	struct file **fpp;
3340 {
3341 	struct file *fp;
3342 
3343 	if ((u_int)fd >= fdp->fd_nfiles ||
3344 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3345 		return (EBADF);
3346 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3347 		return (EINVAL);
3348 	*fpp = fp;
3349 	return (0);
3350 }
3351 /*
3352  * Get (NFS) file handle
3353  */
3354 #ifndef _SYS_SYSPROTO_H_
3355 struct getfh_args {
3356 	char	*fname;
3357 	fhandle_t *fhp;
3358 };
3359 #endif
3360 int
3361 getfh(p, uap)
3362 	struct proc *p;
3363 	register struct getfh_args *uap;
3364 {
3365 	struct nameidata nd;
3366 	fhandle_t fh;
3367 	register struct vnode *vp;
3368 	int error;
3369 
3370 	/*
3371 	 * Must be super user
3372 	 */
3373 	error = suser(p);
3374 	if (error)
3375 		return (error);
3376 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3377 	error = namei(&nd);
3378 	if (error)
3379 		return (error);
3380 	NDFREE(&nd, NDF_ONLY_PNBUF);
3381 	vp = nd.ni_vp;
3382 	bzero(&fh, sizeof(fh));
3383 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3384 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3385 	vput(vp);
3386 	if (error)
3387 		return (error);
3388 	error = copyout(&fh, uap->fhp, sizeof (fh));
3389 	return (error);
3390 }
3391 
3392 /*
3393  * syscall for the rpc.lockd to use to translate a NFS file handle into
3394  * an open descriptor.
3395  *
3396  * warning: do not remove the suser() call or this becomes one giant
3397  * security hole.
3398  */
3399 #ifndef _SYS_SYSPROTO_H_
3400 struct fhopen_args {
3401 	const struct fhandle *u_fhp;
3402 	int flags;
3403 };
3404 #endif
3405 int
3406 fhopen(p, uap)
3407 	struct proc *p;
3408 	struct fhopen_args /* {
3409 		syscallarg(const struct fhandle *) u_fhp;
3410 		syscallarg(int) flags;
3411 	} */ *uap;
3412 {
3413 	struct mount *mp;
3414 	struct vnode *vp;
3415 	struct fhandle fhp;
3416 	struct vattr vat;
3417 	struct vattr *vap = &vat;
3418 	struct flock lf;
3419 	struct file *fp;
3420 	register struct filedesc *fdp = p->p_fd;
3421 	int fmode, mode, error, type;
3422 	struct file *nfp;
3423 	int indx;
3424 
3425 	/*
3426 	 * Must be super user
3427 	 */
3428 	error = suser(p);
3429 	if (error)
3430 		return (error);
3431 
3432 	fmode = FFLAGS(SCARG(uap, flags));
3433 	/* why not allow a non-read/write open for our lockd? */
3434 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3435 		return (EINVAL);
3436 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3437 	if (error)
3438 		return(error);
3439 	/* find the mount point */
3440 	mp = vfs_getvfs(&fhp.fh_fsid);
3441 	if (mp == NULL)
3442 		return (ESTALE);
3443 	/* now give me my vnode, it gets returned to me locked */
3444 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3445 	if (error)
3446 		return (error);
3447  	/*
3448 	 * from now on we have to make sure not
3449 	 * to forget about the vnode
3450 	 * any error that causes an abort must vput(vp)
3451 	 * just set error = err and 'goto bad;'.
3452 	 */
3453 
3454 	/*
3455 	 * from vn_open
3456 	 */
3457 	if (vp->v_type == VLNK) {
3458 		error = EMLINK;
3459 		goto bad;
3460 	}
3461 	if (vp->v_type == VSOCK) {
3462 		error = EOPNOTSUPP;
3463 		goto bad;
3464 	}
3465 	mode = 0;
3466 	if (fmode & (FWRITE | O_TRUNC)) {
3467 		if (vp->v_type == VDIR) {
3468 			error = EISDIR;
3469 			goto bad;
3470 		}
3471 		error = vn_writechk(vp);
3472 		if (error)
3473 			goto bad;
3474 		mode |= VWRITE;
3475 	}
3476 	if (fmode & FREAD)
3477 		mode |= VREAD;
3478 	if (mode) {
3479 		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3480 		if (error)
3481 			goto bad;
3482 	}
3483 	if (fmode & O_TRUNC) {
3484 		VOP_UNLOCK(vp, 0, p);				/* XXX */
3485 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3486 			vrele(vp);
3487 			return (error);
3488 		}
3489 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3490 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3491 		VATTR_NULL(vap);
3492 		vap->va_size = 0;
3493 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3494 		vn_finished_write(mp);
3495 		if (error)
3496 			goto bad;
3497 	}
3498 	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3499 	if (error)
3500 		goto bad;
3501 	/*
3502 	 * Make sure that a VM object is created for VMIO support.
3503 	 */
3504 	if (vn_canvmio(vp) == TRUE) {
3505 		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3506 			goto bad;
3507 	}
3508 	if (fmode & FWRITE)
3509 		vp->v_writecount++;
3510 
3511 	/*
3512 	 * end of vn_open code
3513 	 */
3514 
3515 	if ((error = falloc(p, &nfp, &indx)) != 0)
3516 		goto bad;
3517 	fp = nfp;
3518 
3519 	/*
3520 	 * Hold an extra reference to avoid having fp ripped out
3521 	 * from under us while we block in the lock op
3522 	 */
3523 	fhold(fp);
3524 	nfp->f_data = (caddr_t)vp;
3525 	nfp->f_flag = fmode & FMASK;
3526 	nfp->f_ops = &vnops;
3527 	nfp->f_type = DTYPE_VNODE;
3528 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3529 		lf.l_whence = SEEK_SET;
3530 		lf.l_start = 0;
3531 		lf.l_len = 0;
3532 		if (fmode & O_EXLOCK)
3533 			lf.l_type = F_WRLCK;
3534 		else
3535 			lf.l_type = F_RDLCK;
3536 		type = F_FLOCK;
3537 		if ((fmode & FNONBLOCK) == 0)
3538 			type |= F_WAIT;
3539 		VOP_UNLOCK(vp, 0, p);
3540 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3541 			/*
3542 			 * The lock request failed.  Normally close the
3543 			 * descriptor but handle the case where someone might
3544 			 * have dup()d or close()d it when we weren't looking.
3545 			 */
3546 			if (fdp->fd_ofiles[indx] == fp) {
3547 				fdp->fd_ofiles[indx] = NULL;
3548 				fdrop(fp, p);
3549 			}
3550 			/*
3551 			 * release our private reference
3552 			 */
3553 			fdrop(fp, p);
3554 			return(error);
3555 		}
3556 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3557 		fp->f_flag |= FHASLOCK;
3558 	}
3559 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3560 		vfs_object_create(vp, p, p->p_ucred);
3561 
3562 	VOP_UNLOCK(vp, 0, p);
3563 	fdrop(fp, p);
3564 	p->p_retval[0] = indx;
3565 	return (0);
3566 
3567 bad:
3568 	vput(vp);
3569 	return (error);
3570 }
3571 
3572 /*
3573  * Stat an (NFS) file handle.
3574  */
3575 #ifndef _SYS_SYSPROTO_H_
3576 struct fhstat_args {
3577 	struct fhandle *u_fhp;
3578 	struct stat *sb;
3579 };
3580 #endif
3581 int
3582 fhstat(p, uap)
3583 	struct proc *p;
3584 	register struct fhstat_args /* {
3585 		syscallarg(struct fhandle *) u_fhp;
3586 		syscallarg(struct stat *) sb;
3587 	} */ *uap;
3588 {
3589 	struct stat sb;
3590 	fhandle_t fh;
3591 	struct mount *mp;
3592 	struct vnode *vp;
3593 	int error;
3594 
3595 	/*
3596 	 * Must be super user
3597 	 */
3598 	error = suser(p);
3599 	if (error)
3600 		return (error);
3601 
3602 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3603 	if (error)
3604 		return (error);
3605 
3606 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3607 		return (ESTALE);
3608 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3609 		return (error);
3610 	error = vn_stat(vp, &sb, p);
3611 	vput(vp);
3612 	if (error)
3613 		return (error);
3614 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3615 	return (error);
3616 }
3617 
3618 /*
3619  * Implement fstatfs() for (NFS) file handles.
3620  */
3621 #ifndef _SYS_SYSPROTO_H_
3622 struct fhstatfs_args {
3623 	struct fhandle *u_fhp;
3624 	struct statfs *buf;
3625 };
3626 #endif
3627 int
3628 fhstatfs(p, uap)
3629 	struct proc *p;
3630 	struct fhstatfs_args /* {
3631 		syscallarg(struct fhandle) *u_fhp;
3632 		syscallarg(struct statfs) *buf;
3633 	} */ *uap;
3634 {
3635 	struct statfs *sp;
3636 	struct mount *mp;
3637 	struct vnode *vp;
3638 	struct statfs sb;
3639 	fhandle_t fh;
3640 	int error;
3641 
3642 	/*
3643 	 * Must be super user
3644 	 */
3645 	if ((error = suser(p)))
3646 		return (error);
3647 
3648 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3649 		return (error);
3650 
3651 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3652 		return (ESTALE);
3653 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3654 		return (error);
3655 	mp = vp->v_mount;
3656 	sp = &mp->mnt_stat;
3657 	vput(vp);
3658 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3659 		return (error);
3660 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3661 	if (suser_xxx(p->p_ucred, 0, 0)) {
3662 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3663 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3664 		sp = &sb;
3665 	}
3666 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3667 }
3668 
3669 /*
3670  * Syscall to push extended attribute configuration information into the
3671  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3672  * a command (int cmd), and attribute name and misc data.  For now, the
3673  * attribute name is left in userspace for consumption by the VFS_op.
3674  * It will probably be changed to be copied into sysspace by the
3675  * syscall in the future, once issues with various consumers of the
3676  * attribute code have raised their hands.
3677  *
3678  * Currently this is used only by UFS Extended Attributes.
3679  */
3680 int
3681 extattrctl(p, uap)
3682 	struct proc *p;
3683 	struct extattrctl_args *uap;
3684 {
3685 	struct vnode *filename_vp;
3686 	struct nameidata nd;
3687 	struct mount *mp;
3688 	char attrname[EXTATTR_MAXNAMELEN];
3689 	int error;
3690 
3691 	/*
3692 	 * SCARG(uap, attrname) not always defined.  We check again later
3693 	 * when we invoke the VFS call so as to pass in NULL there if needed.
3694 	 */
3695 	if (SCARG(uap, attrname) != NULL) {
3696 		error = copyinstr(SCARG(uap, attrname), attrname,
3697 		    EXTATTR_MAXNAMELEN, NULL);
3698 		if (error)
3699 			return (error);
3700 	}
3701 
3702 	/*
3703 	 * SCARG(uap, filename) not always defined.  If it is, grab
3704 	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3705 	 */
3706 	filename_vp = NULL;
3707 	if (SCARG(uap, filename) != NULL) {
3708 		NDINIT(&nd, LOOKUP | LOCKLEAF, FOLLOW, UIO_USERSPACE,
3709 		    SCARG(uap, filename), p);
3710 		if ((error = namei(&nd)) != 0)
3711 			return (error);
3712 		filename_vp = nd.ni_vp;
3713 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3714 	}
3715 
3716 	/* SCARG(uap, path) always defined. */
3717 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3718 	if ((error = namei(&nd)) != 0)
3719 		return (error);
3720 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3721 	NDFREE(&nd, 0);
3722 	if (error) {
3723 		if (filename_vp)
3724 			vrele(filename_vp);
3725 		return (error);
3726 	}
3727 
3728 	if (SCARG(uap, attrname) != NULL) {
3729 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3730 		    SCARG(uap, attrnamespace), attrname, p);
3731 	} else {
3732 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3733 		    SCARG(uap, attrnamespace), NULL, p);
3734 	}
3735 
3736 	vn_finished_write(mp);
3737 	/*
3738 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3739 	 * filename_vp, so vrele it if it is defined.
3740 	 */
3741 	if (filename_vp != NULL)
3742 		vrele(filename_vp);
3743 
3744 	return (error);
3745 }
3746 
3747 /*
3748  * extattr_set_vp(): Set a named extended attribute on a file or directory
3749  *
3750  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3751  *            kernelspace string pointer "attrname",
3752  *            userspace iovec array pointer "iovp", unsigned int iovcnt
3753  *            proc "p"
3754  * Returns: 0 on success, an error number otherwise
3755  * Locks: none
3756  * References: vp must be a valid reference for the duration of the call
3757  */
3758 static int
3759 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3760     struct iovec *iovp, unsigned iovcnt, struct proc *p)
3761 {
3762 	struct mount *mp;
3763 	struct uio auio;
3764 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3765 	u_int iovlen, cnt;
3766 	int error, i;
3767 
3768 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3769 		return (error);
3770 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3771 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3772 
3773 	iovlen = iovcnt * sizeof(struct iovec);
3774 	if (iovcnt > UIO_SMALLIOV) {
3775 		if (iovcnt > UIO_MAXIOV) {
3776 			error = EINVAL;
3777 			goto done;
3778 		}
3779 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3780 		needfree = iov;
3781 	} else
3782 		iov = aiov;
3783 	auio.uio_iov = iov;
3784 	auio.uio_iovcnt = iovcnt;
3785 	auio.uio_rw = UIO_WRITE;
3786 	auio.uio_segflg = UIO_USERSPACE;
3787 	auio.uio_procp = p;
3788 	auio.uio_offset = 0;
3789 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3790 		goto done;
3791 	auio.uio_resid = 0;
3792 	for (i = 0; i < iovcnt; i++) {
3793 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3794 			error = EINVAL;
3795 			goto done;
3796 		}
3797 		auio.uio_resid += iov->iov_len;
3798 		iov++;
3799 	}
3800 	cnt = auio.uio_resid;
3801 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3802 	    p->p_cred->pc_ucred, p);
3803 	cnt -= auio.uio_resid;
3804 	p->p_retval[0] = cnt;
3805 done:
3806 	if (needfree)
3807 		FREE(needfree, M_IOV);
3808 	VOP_UNLOCK(vp, 0, p);
3809 	vn_finished_write(mp);
3810 	return (error);
3811 }
3812 
3813 int
3814 extattr_set_file(p, uap)
3815 	struct proc *p;
3816 	struct extattr_set_file_args *uap;
3817 {
3818 	struct nameidata nd;
3819 	char attrname[EXTATTR_MAXNAMELEN];
3820 	int error;
3821 
3822 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3823 	    NULL);
3824 	if (error)
3825 		return (error);
3826 
3827 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3828 	if ((error = namei(&nd)) != 0)
3829 		return (error);
3830 	NDFREE(&nd, NDF_ONLY_PNBUF);
3831 
3832 	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3833 	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3834 
3835 	vrele(nd.ni_vp);
3836 	return (error);
3837 }
3838 
3839 int
3840 extattr_set_fd(p, uap)
3841 	struct proc *p;
3842 	struct extattr_set_fd_args *uap;
3843 {
3844 	struct file *fp;
3845 	char attrname[EXTATTR_MAXNAMELEN];
3846 	int error;
3847 
3848 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3849 	    NULL);
3850 	if (error)
3851 		return (error);
3852 
3853 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3854 		return (error);
3855 
3856 	error = extattr_set_vp((struct vnode *)fp->f_data,
3857 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3858 	    SCARG(uap, iovcnt), p);
3859 
3860 	return (error);
3861 }
3862 
3863 /*
3864  * extattr_get_vp(): Get a named extended attribute on a file or directory
3865  *
3866  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3867  *            kernelspace string pointer "attrname",
3868  *            userspace iovec array pointer "iovp", unsigned int iovcnt,
3869  *            proc "p"
3870  * Returns: 0 on success, an error number otherwise
3871  * Locks: none
3872  * References: vp must be a valid reference for the duration of the call
3873  */
3874 static int
3875 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3876     struct iovec *iovp, unsigned iovcnt, struct proc *p)
3877 {
3878 	struct uio auio;
3879 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3880 	u_int iovlen, cnt;
3881 	int error, i;
3882 
3883 	VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
3884 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3885 
3886 	iovlen = iovcnt * sizeof (struct iovec);
3887 	if (iovcnt > UIO_SMALLIOV) {
3888 		if (iovcnt > UIO_MAXIOV) {
3889 			error = EINVAL;
3890 			goto done;
3891 		}
3892 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3893 		needfree = iov;
3894 	} else
3895 		iov = aiov;
3896 	auio.uio_iov = iov;
3897 	auio.uio_iovcnt = iovcnt;
3898 	auio.uio_rw = UIO_READ;
3899 	auio.uio_segflg = UIO_USERSPACE;
3900 	auio.uio_procp = p;
3901 	auio.uio_offset = 0;
3902 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3903 		goto done;
3904 	auio.uio_resid = 0;
3905 	for (i = 0; i < iovcnt; i++) {
3906 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3907 			error = EINVAL;
3908 			goto done;
3909 		}
3910 		auio.uio_resid += iov->iov_len;
3911 		iov++;
3912 	}
3913 	cnt = auio.uio_resid;
3914 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
3915 	    p->p_cred->pc_ucred, p);
3916 	cnt -= auio.uio_resid;
3917 	p->p_retval[0] = cnt;
3918 done:
3919 	if (needfree)
3920 		FREE(needfree, M_IOV);
3921 	VOP_UNLOCK(vp, 0, p);
3922 	return (error);
3923 }
3924 
3925 int
3926 extattr_get_file(p, uap)
3927 	struct proc *p;
3928 	struct extattr_get_file_args *uap;
3929 {
3930 	struct nameidata nd;
3931 	char attrname[EXTATTR_MAXNAMELEN];
3932 	int error;
3933 
3934 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3935 	    NULL);
3936 	if (error)
3937 		return (error);
3938 
3939 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3940 	if ((error = namei(&nd)) != 0)
3941 		return (error);
3942 	NDFREE(&nd, NDF_ONLY_PNBUF);
3943 
3944 	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3945 	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3946 
3947 	vrele(nd.ni_vp);
3948 	return (error);
3949 }
3950 
3951 int
3952 extattr_get_fd(p, uap)
3953 	struct proc *p;
3954 	struct extattr_get_fd_args *uap;
3955 {
3956 	struct file *fp;
3957 	char attrname[EXTATTR_MAXNAMELEN];
3958 	int error;
3959 
3960 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3961 	    NULL);
3962 	if (error)
3963 		return (error);
3964 
3965 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3966 		return (error);
3967 
3968 	error = extattr_get_vp((struct vnode *)fp->f_data,
3969 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3970 	    SCARG(uap, iovcnt), p);
3971 
3972 	return (error);
3973 }
3974 
3975 /*
3976  * extattr_delete_vp(): Delete a named extended attribute on a file or
3977  *                      directory
3978  *
3979  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3980  *            kernelspace string pointer "attrname", proc "p"
3981  * Returns: 0 on success, an error number otherwise
3982  * Locks: none
3983  * References: vp must be a valid reference for the duration of the call
3984  */
3985 static int
3986 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3987     struct proc *p)
3988 {
3989 	struct mount *mp;
3990 	int error;
3991 
3992 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3993 		return (error);
3994 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3995 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3996 
3997 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
3998 	    p->p_cred->pc_ucred, p);
3999 
4000 	VOP_UNLOCK(vp, 0, p);
4001 	vn_finished_write(mp);
4002 	return (error);
4003 }
4004 
4005 int
4006 extattr_delete_file(p, uap)
4007 	struct proc *p;
4008 	struct extattr_delete_file_args *uap;
4009 {
4010 	struct nameidata nd;
4011 	char attrname[EXTATTR_MAXNAMELEN];
4012 	int error;
4013 
4014 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4015 	     NULL);
4016 	if (error)
4017 		return(error);
4018 
4019 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
4020 	if ((error = namei(&nd)) != 0)
4021 		return(error);
4022 	NDFREE(&nd, NDF_ONLY_PNBUF);
4023 
4024 	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4025 	    attrname, p);
4026 
4027 	vrele(nd.ni_vp);
4028 	return(error);
4029 }
4030 
4031 int
4032 extattr_delete_fd(p, uap)
4033 	struct proc *p;
4034 	struct extattr_delete_fd_args *uap;
4035 {
4036 	struct file *fp;
4037 	char attrname[EXTATTR_MAXNAMELEN];
4038 	int error;
4039 
4040 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4041 	    NULL);
4042 	if (error)
4043 		return (error);
4044 
4045 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
4046 		return (error);
4047 
4048 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4049 	    SCARG(uap, attrnamespace), attrname, p);
4050 
4051 	return (error);
4052 }
4053