xref: /freebsd/sys/kern/vfs_extattr.c (revision 1d66272a85cde1c8a69c58f4b5dd649babd6eca6)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 
68 #include <machine/limits.h>
69 #include <miscfs/union/union.h>
70 #include <sys/sysctl.h>
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_zone.h>
74 #include <vm/vm_page.h>
75 
76 static int change_dir __P((struct nameidata *ndp, struct proc *p));
77 static void checkdirs __P((struct vnode *olddp));
78 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
79 static int getutimes __P((const struct timeval *, struct timespec *));
80 static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
81 static int setfmode __P((struct proc *, struct vnode *, int));
82 static int setfflags __P((struct proc *, struct vnode *, int));
83 static int setutimes __P((struct proc *, struct vnode *,
84     const struct timespec *, int));
85 static int	usermount = 0;	/* if 1, non-root can mount fs. */
86 
87 int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
88 
89 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
90 
91 /*
92  * Virtual File System System Calls
93  */
94 
95 /*
96  * Mount a file system.
97  */
98 #ifndef _SYS_SYSPROTO_H_
99 struct mount_args {
100 	char	*type;
101 	char	*path;
102 	int	flags;
103 	caddr_t	data;
104 };
105 #endif
106 /* ARGSUSED */
107 int
108 mount(p, uap)
109 	struct proc *p;
110 	register struct mount_args /* {
111 		syscallarg(char *) type;
112 		syscallarg(char *) path;
113 		syscallarg(int) flags;
114 		syscallarg(caddr_t) data;
115 	} */ *uap;
116 {
117 	struct vnode *vp;
118 	struct mount *mp;
119 	struct vfsconf *vfsp;
120 	int error, flag = 0, flag2 = 0;
121 	struct vattr va;
122 	struct nameidata nd;
123 	char fstypename[MFSNAMELEN];
124 
125 	if (usermount == 0 && (error = suser(p)))
126 		return (error);
127 	/*
128 	 * Do not allow NFS export by non-root users.
129 	 */
130 	if (SCARG(uap, flags) & MNT_EXPORTED) {
131 		error = suser(p);
132 		if (error)
133 			return (error);
134 	}
135 	/*
136 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
137 	 */
138 	if (suser_xxx(p->p_ucred, 0, 0))
139 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
140 	/*
141 	 * Get vnode to be covered
142 	 */
143 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
144 	    SCARG(uap, path), p);
145 	if ((error = namei(&nd)) != 0)
146 		return (error);
147 	NDFREE(&nd, NDF_ONLY_PNBUF);
148 	vp = nd.ni_vp;
149 	if (SCARG(uap, flags) & MNT_UPDATE) {
150 		if ((vp->v_flag & VROOT) == 0) {
151 			vput(vp);
152 			return (EINVAL);
153 		}
154 		mp = vp->v_mount;
155 		flag = mp->mnt_flag;
156 		flag2 = mp->mnt_kern_flag;
157 		/*
158 		 * We only allow the filesystem to be reloaded if it
159 		 * is currently mounted read-only.
160 		 */
161 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
162 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
163 			vput(vp);
164 			return (EOPNOTSUPP);	/* Needs translation */
165 		}
166 		/*
167 		 * Only root, or the user that did the original mount is
168 		 * permitted to update it.
169 		 */
170 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
171 		    (error = suser(p))) {
172 			vput(vp);
173 			return (error);
174 		}
175 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
176 			vput(vp);
177 			return (EBUSY);
178 		}
179 		mtx_enter(&vp->v_interlock, MTX_DEF);
180 		if ((vp->v_flag & VMOUNT) != 0 ||
181 		    vp->v_mountedhere != NULL) {
182 			mtx_exit(&vp->v_interlock, MTX_DEF);
183 			vfs_unbusy(mp, p);
184 			vput(vp);
185 			return (EBUSY);
186 		}
187 		vp->v_flag |= VMOUNT;
188 		mtx_exit(&vp->v_interlock, MTX_DEF);
189 		mp->mnt_flag |= SCARG(uap, flags) &
190 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
191 		VOP_UNLOCK(vp, 0, p);
192 		goto update;
193 	}
194 	/*
195 	 * If the user is not root, ensure that they own the directory
196 	 * onto which we are attempting to mount.
197 	 */
198 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
199 	    (va.va_uid != p->p_ucred->cr_uid &&
200 	     (error = suser(p)))) {
201 		vput(vp);
202 		return (error);
203 	}
204 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
205 		vput(vp);
206 		return (error);
207 	}
208 	if (vp->v_type != VDIR) {
209 		vput(vp);
210 		return (ENOTDIR);
211 	}
212 	if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) {
213 		vput(vp);
214 		return (error);
215 	}
216 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
217 		if (!strcmp(vfsp->vfc_name, fstypename))
218 			break;
219 	if (vfsp == NULL) {
220 		linker_file_t lf;
221 
222 		/* Only load modules for root (very important!) */
223 		if ((error = suser(p)) != 0) {
224 			vput(vp);
225 			return error;
226 		}
227 		error = linker_load_file(fstypename, &lf);
228 		if (error || lf == NULL) {
229 			vput(vp);
230 			if (lf == NULL)
231 				error = ENODEV;
232 			return error;
233 		}
234 		lf->userrefs++;
235 		/* lookup again, see if the VFS was loaded */
236 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
237 			if (!strcmp(vfsp->vfc_name, fstypename))
238 				break;
239 		if (vfsp == NULL) {
240 			lf->userrefs--;
241 			linker_file_unload(lf);
242 			vput(vp);
243 			return (ENODEV);
244 		}
245 	}
246 	mtx_enter(&vp->v_interlock, MTX_DEF);
247 	if ((vp->v_flag & VMOUNT) != 0 ||
248 	    vp->v_mountedhere != NULL) {
249 		mtx_exit(&vp->v_interlock, MTX_DEF);
250 		vput(vp);
251 		return (EBUSY);
252 	}
253 	vp->v_flag |= VMOUNT;
254 	mtx_exit(&vp->v_interlock, MTX_DEF);
255 
256 	/*
257 	 * Allocate and initialize the filesystem.
258 	 */
259 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
260 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
261 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
262 	mp->mnt_op = vfsp->vfc_vfsops;
263 	mp->mnt_vfc = vfsp;
264 	vfsp->vfc_refcount++;
265 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
266 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
267 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
268 	mp->mnt_vnodecovered = vp;
269 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
270 	mp->mnt_iosize_max = DFLTPHYS;
271 	VOP_UNLOCK(vp, 0, p);
272 update:
273 	/*
274 	 * Set the mount level flags.
275 	 */
276 	if (SCARG(uap, flags) & MNT_RDONLY)
277 		mp->mnt_flag |= MNT_RDONLY;
278 	else if (mp->mnt_flag & MNT_RDONLY)
279 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
280 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
281 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
282 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
283 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
284 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
285 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
286 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
287 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
288 	/*
289 	 * Mount the filesystem.
290 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
291 	 * get.  No freeing of cn_pnbuf.
292 	 */
293 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
294 	if (mp->mnt_flag & MNT_UPDATE) {
295 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
296 			mp->mnt_flag &= ~MNT_RDONLY;
297 		mp->mnt_flag &=~
298 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
299 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
300 		if (error) {
301 			mp->mnt_flag = flag;
302 			mp->mnt_kern_flag = flag2;
303 		}
304 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
305 			if (mp->mnt_syncer == NULL)
306 				error = vfs_allocate_syncvnode(mp);
307 		} else {
308 			if (mp->mnt_syncer != NULL)
309 				vrele(mp->mnt_syncer);
310 			mp->mnt_syncer = NULL;
311 		}
312 		vfs_unbusy(mp, p);
313 		mtx_enter(&vp->v_interlock, MTX_DEF);
314 		vp->v_flag &= ~VMOUNT;
315 		mtx_exit(&vp->v_interlock, MTX_DEF);
316 		vrele(vp);
317 		return (error);
318 	}
319 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
320 	/*
321 	 * Put the new filesystem on the mount list after root.
322 	 */
323 	cache_purge(vp);
324 	if (!error) {
325 		mtx_enter(&vp->v_interlock, MTX_DEF);
326 		vp->v_flag &= ~VMOUNT;
327 		vp->v_mountedhere = mp;
328 		mtx_exit(&vp->v_interlock, MTX_DEF);
329 		mtx_enter(&mountlist_mtx, MTX_DEF);
330 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
331 		mtx_exit(&mountlist_mtx, MTX_DEF);
332 		checkdirs(vp);
333 		VOP_UNLOCK(vp, 0, p);
334 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
335 			error = vfs_allocate_syncvnode(mp);
336 		vfs_unbusy(mp, p);
337 		if ((error = VFS_START(mp, 0, p)) != 0)
338 			vrele(vp);
339 	} else {
340 		mtx_enter(&vp->v_interlock, MTX_DEF);
341 		vp->v_flag &= ~VMOUNT;
342 		mtx_exit(&vp->v_interlock, MTX_DEF);
343 		mp->mnt_vfc->vfc_refcount--;
344 		vfs_unbusy(mp, p);
345 		free((caddr_t)mp, M_MOUNT);
346 		vput(vp);
347 	}
348 	return (error);
349 }
350 
351 /*
352  * Scan all active processes to see if any of them have a current
353  * or root directory onto which the new filesystem has just been
354  * mounted. If so, replace them with the new mount point.
355  */
356 static void
357 checkdirs(olddp)
358 	struct vnode *olddp;
359 {
360 	struct filedesc *fdp;
361 	struct vnode *newdp;
362 	struct proc *p;
363 
364 	if (olddp->v_usecount == 1)
365 		return;
366 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
367 		panic("mount: lost mount");
368 	ALLPROC_LOCK(AP_SHARED);
369 	LIST_FOREACH(p, &allproc, p_list) {
370 		fdp = p->p_fd;
371 		if (fdp->fd_cdir == olddp) {
372 			vrele(fdp->fd_cdir);
373 			VREF(newdp);
374 			fdp->fd_cdir = newdp;
375 		}
376 		if (fdp->fd_rdir == olddp) {
377 			vrele(fdp->fd_rdir);
378 			VREF(newdp);
379 			fdp->fd_rdir = newdp;
380 		}
381 	}
382 	ALLPROC_LOCK(AP_RELEASE);
383 	if (rootvnode == olddp) {
384 		vrele(rootvnode);
385 		VREF(newdp);
386 		rootvnode = newdp;
387 	}
388 	vput(newdp);
389 }
390 
391 /*
392  * Unmount a file system.
393  *
394  * Note: unmount takes a path to the vnode mounted on as argument,
395  * not special file (as before).
396  */
397 #ifndef _SYS_SYSPROTO_H_
398 struct unmount_args {
399 	char	*path;
400 	int	flags;
401 };
402 #endif
403 /* ARGSUSED */
404 int
405 unmount(p, uap)
406 	struct proc *p;
407 	register struct unmount_args /* {
408 		syscallarg(char *) path;
409 		syscallarg(int) flags;
410 	} */ *uap;
411 {
412 	register struct vnode *vp;
413 	struct mount *mp;
414 	int error;
415 	struct nameidata nd;
416 
417 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
418 	    SCARG(uap, path), p);
419 	if ((error = namei(&nd)) != 0)
420 		return (error);
421 	vp = nd.ni_vp;
422 	NDFREE(&nd, NDF_ONLY_PNBUF);
423 	mp = vp->v_mount;
424 
425 	/*
426 	 * Only root, or the user that did the original mount is
427 	 * permitted to unmount this filesystem.
428 	 */
429 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
430 	    (error = suser(p))) {
431 		vput(vp);
432 		return (error);
433 	}
434 
435 	/*
436 	 * Don't allow unmounting the root file system.
437 	 */
438 	if (mp->mnt_flag & MNT_ROOTFS) {
439 		vput(vp);
440 		return (EINVAL);
441 	}
442 
443 	/*
444 	 * Must be the root of the filesystem
445 	 */
446 	if ((vp->v_flag & VROOT) == 0) {
447 		vput(vp);
448 		return (EINVAL);
449 	}
450 	vput(vp);
451 	return (dounmount(mp, SCARG(uap, flags), p));
452 }
453 
454 /*
455  * Do the actual file system unmount.
456  */
457 int
458 dounmount(mp, flags, p)
459 	struct mount *mp;
460 	int flags;
461 	struct proc *p;
462 {
463 	struct vnode *coveredvp;
464 	int error;
465 	int async_flag;
466 
467 	mtx_enter(&mountlist_mtx, MTX_DEF);
468 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
469 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, p);
470 	vn_start_write(NULL, &mp, V_WAIT);
471 
472 	if (mp->mnt_flag & MNT_EXPUBLIC)
473 		vfs_setpublicfs(NULL, NULL, NULL);
474 
475 	vfs_msync(mp, MNT_WAIT);
476 	async_flag = mp->mnt_flag & MNT_ASYNC;
477 	mp->mnt_flag &=~ MNT_ASYNC;
478 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
479 	if (mp->mnt_syncer != NULL)
480 		vrele(mp->mnt_syncer);
481 	if (((mp->mnt_flag & MNT_RDONLY) ||
482 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
483 	    (flags & MNT_FORCE)) {
484 		error = VFS_UNMOUNT(mp, flags, p);
485 	}
486 	vn_finished_write(mp);
487 	mtx_enter(&mountlist_mtx, MTX_DEF);
488 	if (error) {
489 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
490 			(void) vfs_allocate_syncvnode(mp);
491 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
492 		mp->mnt_flag |= async_flag;
493 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
494 		    &mountlist_mtx, p);
495 		if (mp->mnt_kern_flag & MNTK_MWAIT)
496 			wakeup((caddr_t)mp);
497 		return (error);
498 	}
499 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
500 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
501 		coveredvp->v_mountedhere = (struct mount *)0;
502 		vrele(coveredvp);
503 	}
504 	mp->mnt_vfc->vfc_refcount--;
505 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
506 		panic("unmount: dangling vnode");
507 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, p);
508 	lockdestroy(&mp->mnt_lock);
509 	if (mp->mnt_kern_flag & MNTK_MWAIT)
510 		wakeup((caddr_t)mp);
511 	free((caddr_t)mp, M_MOUNT);
512 	return (0);
513 }
514 
515 /*
516  * Sync each mounted filesystem.
517  */
518 #ifndef _SYS_SYSPROTO_H_
519 struct sync_args {
520         int     dummy;
521 };
522 #endif
523 
524 #ifdef DEBUG
525 static int syncprt = 0;
526 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
527 #endif
528 
529 /* ARGSUSED */
530 int
531 sync(p, uap)
532 	struct proc *p;
533 	struct sync_args *uap;
534 {
535 	struct mount *mp, *nmp;
536 	int asyncflag;
537 
538 	mtx_enter(&mountlist_mtx, MTX_DEF);
539 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
540 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
541 			nmp = TAILQ_NEXT(mp, mnt_list);
542 			continue;
543 		}
544 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
545 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
546 			asyncflag = mp->mnt_flag & MNT_ASYNC;
547 			mp->mnt_flag &= ~MNT_ASYNC;
548 			vfs_msync(mp, MNT_NOWAIT);
549 			VFS_SYNC(mp, MNT_NOWAIT,
550 			    ((p != NULL) ? p->p_ucred : NOCRED), p);
551 			mp->mnt_flag |= asyncflag;
552 			vn_finished_write(mp);
553 		}
554 		mtx_enter(&mountlist_mtx, MTX_DEF);
555 		nmp = TAILQ_NEXT(mp, mnt_list);
556 		vfs_unbusy(mp, p);
557 	}
558 	mtx_exit(&mountlist_mtx, MTX_DEF);
559 #if 0
560 /*
561  * XXX don't call vfs_bufstats() yet because that routine
562  * was not imported in the Lite2 merge.
563  */
564 #ifdef DIAGNOSTIC
565 	if (syncprt)
566 		vfs_bufstats();
567 #endif /* DIAGNOSTIC */
568 #endif
569 	return (0);
570 }
571 
572 /* XXX PRISON: could be per prison flag */
573 static int prison_quotas;
574 #if 0
575 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
576 #endif
577 
578 /*
579  * Change filesystem quotas.
580  */
581 #ifndef _SYS_SYSPROTO_H_
582 struct quotactl_args {
583 	char *path;
584 	int cmd;
585 	int uid;
586 	caddr_t arg;
587 };
588 #endif
589 /* ARGSUSED */
590 int
591 quotactl(p, uap)
592 	struct proc *p;
593 	register struct quotactl_args /* {
594 		syscallarg(char *) path;
595 		syscallarg(int) cmd;
596 		syscallarg(int) uid;
597 		syscallarg(caddr_t) arg;
598 	} */ *uap;
599 {
600 	struct mount *mp;
601 	int error;
602 	struct nameidata nd;
603 
604 	if (p->p_prison && !prison_quotas)
605 		return (EPERM);
606 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
607 	if ((error = namei(&nd)) != 0)
608 		return (error);
609 	NDFREE(&nd, NDF_ONLY_PNBUF);
610 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
611 	vrele(nd.ni_vp);
612 	if (error)
613 		return (error);
614 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
615 	    SCARG(uap, arg), p);
616 	vn_finished_write(mp);
617 	return (error);
618 }
619 
620 /*
621  * Get filesystem statistics.
622  */
623 #ifndef _SYS_SYSPROTO_H_
624 struct statfs_args {
625 	char *path;
626 	struct statfs *buf;
627 };
628 #endif
629 /* ARGSUSED */
630 int
631 statfs(p, uap)
632 	struct proc *p;
633 	register struct statfs_args /* {
634 		syscallarg(char *) path;
635 		syscallarg(struct statfs *) buf;
636 	} */ *uap;
637 {
638 	register struct mount *mp;
639 	register struct statfs *sp;
640 	int error;
641 	struct nameidata nd;
642 	struct statfs sb;
643 
644 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
645 	if ((error = namei(&nd)) != 0)
646 		return (error);
647 	mp = nd.ni_vp->v_mount;
648 	sp = &mp->mnt_stat;
649 	NDFREE(&nd, NDF_ONLY_PNBUF);
650 	vrele(nd.ni_vp);
651 	error = VFS_STATFS(mp, sp, p);
652 	if (error)
653 		return (error);
654 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
655 	if (suser_xxx(p->p_ucred, 0, 0)) {
656 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
657 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
658 		sp = &sb;
659 	}
660 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
661 }
662 
663 /*
664  * Get filesystem statistics.
665  */
666 #ifndef _SYS_SYSPROTO_H_
667 struct fstatfs_args {
668 	int fd;
669 	struct statfs *buf;
670 };
671 #endif
672 /* ARGSUSED */
673 int
674 fstatfs(p, uap)
675 	struct proc *p;
676 	register struct fstatfs_args /* {
677 		syscallarg(int) fd;
678 		syscallarg(struct statfs *) buf;
679 	} */ *uap;
680 {
681 	struct file *fp;
682 	struct mount *mp;
683 	register struct statfs *sp;
684 	int error;
685 	struct statfs sb;
686 
687 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
688 		return (error);
689 	mp = ((struct vnode *)fp->f_data)->v_mount;
690 	sp = &mp->mnt_stat;
691 	error = VFS_STATFS(mp, sp, p);
692 	if (error)
693 		return (error);
694 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
695 	if (suser_xxx(p->p_ucred, 0, 0)) {
696 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
697 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
698 		sp = &sb;
699 	}
700 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
701 }
702 
703 /*
704  * Get statistics on all filesystems.
705  */
706 #ifndef _SYS_SYSPROTO_H_
707 struct getfsstat_args {
708 	struct statfs *buf;
709 	long bufsize;
710 	int flags;
711 };
712 #endif
713 int
714 getfsstat(p, uap)
715 	struct proc *p;
716 	register struct getfsstat_args /* {
717 		syscallarg(struct statfs *) buf;
718 		syscallarg(long) bufsize;
719 		syscallarg(int) flags;
720 	} */ *uap;
721 {
722 	register struct mount *mp, *nmp;
723 	register struct statfs *sp;
724 	caddr_t sfsp;
725 	long count, maxcount, error;
726 
727 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
728 	sfsp = (caddr_t)SCARG(uap, buf);
729 	count = 0;
730 	mtx_enter(&mountlist_mtx, MTX_DEF);
731 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
732 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
733 			nmp = TAILQ_NEXT(mp, mnt_list);
734 			continue;
735 		}
736 		if (sfsp && count < maxcount) {
737 			sp = &mp->mnt_stat;
738 			/*
739 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
740 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
741 			 * overrides MNT_WAIT.
742 			 */
743 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
744 			    (SCARG(uap, flags) & MNT_WAIT)) &&
745 			    (error = VFS_STATFS(mp, sp, p))) {
746 				mtx_enter(&mountlist_mtx, MTX_DEF);
747 				nmp = TAILQ_NEXT(mp, mnt_list);
748 				vfs_unbusy(mp, p);
749 				continue;
750 			}
751 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
752 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
753 			if (error) {
754 				vfs_unbusy(mp, p);
755 				return (error);
756 			}
757 			sfsp += sizeof(*sp);
758 		}
759 		count++;
760 		mtx_enter(&mountlist_mtx, MTX_DEF);
761 		nmp = TAILQ_NEXT(mp, mnt_list);
762 		vfs_unbusy(mp, p);
763 	}
764 	mtx_exit(&mountlist_mtx, MTX_DEF);
765 	if (sfsp && count > maxcount)
766 		p->p_retval[0] = maxcount;
767 	else
768 		p->p_retval[0] = count;
769 	return (0);
770 }
771 
772 /*
773  * Change current working directory to a given file descriptor.
774  */
775 #ifndef _SYS_SYSPROTO_H_
776 struct fchdir_args {
777 	int	fd;
778 };
779 #endif
780 /* ARGSUSED */
781 int
782 fchdir(p, uap)
783 	struct proc *p;
784 	struct fchdir_args /* {
785 		syscallarg(int) fd;
786 	} */ *uap;
787 {
788 	register struct filedesc *fdp = p->p_fd;
789 	struct vnode *vp, *tdp;
790 	struct mount *mp;
791 	struct file *fp;
792 	int error;
793 
794 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
795 		return (error);
796 	vp = (struct vnode *)fp->f_data;
797 	VREF(vp);
798 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
799 	if (vp->v_type != VDIR)
800 		error = ENOTDIR;
801 	else
802 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
803 	while (!error && (mp = vp->v_mountedhere) != NULL) {
804 		if (vfs_busy(mp, 0, 0, p))
805 			continue;
806 		error = VFS_ROOT(mp, &tdp);
807 		vfs_unbusy(mp, p);
808 		if (error)
809 			break;
810 		vput(vp);
811 		vp = tdp;
812 	}
813 	if (error) {
814 		vput(vp);
815 		return (error);
816 	}
817 	VOP_UNLOCK(vp, 0, p);
818 	vrele(fdp->fd_cdir);
819 	fdp->fd_cdir = vp;
820 	return (0);
821 }
822 
823 /*
824  * Change current working directory (``.'').
825  */
826 #ifndef _SYS_SYSPROTO_H_
827 struct chdir_args {
828 	char	*path;
829 };
830 #endif
831 /* ARGSUSED */
832 int
833 chdir(p, uap)
834 	struct proc *p;
835 	struct chdir_args /* {
836 		syscallarg(char *) path;
837 	} */ *uap;
838 {
839 	register struct filedesc *fdp = p->p_fd;
840 	int error;
841 	struct nameidata nd;
842 
843 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
844 	    SCARG(uap, path), p);
845 	if ((error = change_dir(&nd, p)) != 0)
846 		return (error);
847 	NDFREE(&nd, NDF_ONLY_PNBUF);
848 	vrele(fdp->fd_cdir);
849 	fdp->fd_cdir = nd.ni_vp;
850 	return (0);
851 }
852 
853 /*
854  * Helper function for raised chroot(2) security function:  Refuse if
855  * any filedescriptors are open directories.
856  */
857 static int
858 chroot_refuse_vdir_fds(fdp)
859 	struct filedesc *fdp;
860 {
861 	struct vnode *vp;
862 	struct file *fp;
863 	int error;
864 	int fd;
865 
866 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
867 		error = getvnode(fdp, fd, &fp);
868 		if (error)
869 			continue;
870 		vp = (struct vnode *)fp->f_data;
871 		if (vp->v_type != VDIR)
872 			continue;
873 		return(EPERM);
874 	}
875 	return (0);
876 }
877 
878 /*
879  * This sysctl determines if we will allow a process to chroot(2) if it
880  * has a directory open:
881  *	0: disallowed for all processes.
882  *	1: allowed for processes that were not already chroot(2)'ed.
883  *	2: allowed for all processes.
884  */
885 
886 static int chroot_allow_open_directories = 1;
887 
888 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
889      &chroot_allow_open_directories, 0, "");
890 
891 /*
892  * Change notion of root (``/'') directory.
893  */
894 #ifndef _SYS_SYSPROTO_H_
895 struct chroot_args {
896 	char	*path;
897 };
898 #endif
899 /* ARGSUSED */
900 int
901 chroot(p, uap)
902 	struct proc *p;
903 	struct chroot_args /* {
904 		syscallarg(char *) path;
905 	} */ *uap;
906 {
907 	register struct filedesc *fdp = p->p_fd;
908 	int error;
909 	struct nameidata nd;
910 
911 	error = suser_xxx(0, p, PRISON_ROOT);
912 	if (error)
913 		return (error);
914 	if (chroot_allow_open_directories == 0 ||
915 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
916 		error = chroot_refuse_vdir_fds(fdp);
917 	if (error)
918 		return (error);
919 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
920 	    SCARG(uap, path), p);
921 	if ((error = change_dir(&nd, p)) != 0)
922 		return (error);
923 	NDFREE(&nd, NDF_ONLY_PNBUF);
924 	vrele(fdp->fd_rdir);
925 	fdp->fd_rdir = nd.ni_vp;
926 	if (!fdp->fd_jdir) {
927 		fdp->fd_jdir = nd.ni_vp;
928                 VREF(fdp->fd_jdir);
929 	}
930 	return (0);
931 }
932 
933 /*
934  * Common routine for chroot and chdir.
935  */
936 static int
937 change_dir(ndp, p)
938 	register struct nameidata *ndp;
939 	struct proc *p;
940 {
941 	struct vnode *vp;
942 	int error;
943 
944 	error = namei(ndp);
945 	if (error)
946 		return (error);
947 	vp = ndp->ni_vp;
948 	if (vp->v_type != VDIR)
949 		error = ENOTDIR;
950 	else
951 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
952 	if (error)
953 		vput(vp);
954 	else
955 		VOP_UNLOCK(vp, 0, p);
956 	return (error);
957 }
958 
959 /*
960  * Check permissions, allocate an open file structure,
961  * and call the device open routine if any.
962  */
963 #ifndef _SYS_SYSPROTO_H_
964 struct open_args {
965 	char	*path;
966 	int	flags;
967 	int	mode;
968 };
969 #endif
970 int
971 open(p, uap)
972 	struct proc *p;
973 	register struct open_args /* {
974 		syscallarg(char *) path;
975 		syscallarg(int) flags;
976 		syscallarg(int) mode;
977 	} */ *uap;
978 {
979 	struct filedesc *fdp = p->p_fd;
980 	struct file *fp;
981 	struct vnode *vp;
982 	struct vattr vat;
983 	struct mount *mp;
984 	int cmode, flags, oflags;
985 	struct file *nfp;
986 	int type, indx, error;
987 	struct flock lf;
988 	struct nameidata nd;
989 
990 	oflags = SCARG(uap, flags);
991 	if ((oflags & O_ACCMODE) == O_ACCMODE)
992 		return (EINVAL);
993 	flags = FFLAGS(oflags);
994 	error = falloc(p, &nfp, &indx);
995 	if (error)
996 		return (error);
997 	fp = nfp;
998 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
999 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1000 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1001 	/*
1002 	 * Bump the ref count to prevent another process from closing
1003 	 * the descriptor while we are blocked in vn_open()
1004 	 */
1005 	fhold(fp);
1006 	error = vn_open(&nd, &flags, cmode);
1007 	if (error) {
1008 		/*
1009 		 * release our own reference
1010 		 */
1011 		fdrop(fp, p);
1012 
1013 		/*
1014 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1015 		 * responsible for dropping the old contents of ofiles[indx]
1016 		 * if it succeeds.
1017 		 */
1018 		if ((error == ENODEV || error == ENXIO) &&
1019 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1020 		    (error =
1021 			dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) {
1022 			p->p_retval[0] = indx;
1023 			return (0);
1024 		}
1025 		/*
1026 		 * Clean up the descriptor, but only if another thread hadn't
1027 		 * replaced or closed it.
1028 		 */
1029 		if (fdp->fd_ofiles[indx] == fp) {
1030 			fdp->fd_ofiles[indx] = NULL;
1031 			fdrop(fp, p);
1032 		}
1033 
1034 		if (error == ERESTART)
1035 			error = EINTR;
1036 		return (error);
1037 	}
1038 	p->p_dupfd = 0;
1039 	NDFREE(&nd, NDF_ONLY_PNBUF);
1040 	vp = nd.ni_vp;
1041 
1042 	/*
1043 	 * There should be 2 references on the file, one from the descriptor
1044 	 * table, and one for us.
1045 	 *
1046 	 * Handle the case where someone closed the file (via its file
1047 	 * descriptor) while we were blocked.  The end result should look
1048 	 * like opening the file succeeded but it was immediately closed.
1049 	 */
1050 	if (fp->f_count == 1) {
1051 		KASSERT(fdp->fd_ofiles[indx] != fp,
1052 		    ("Open file descriptor lost all refs"));
1053 		VOP_UNLOCK(vp, 0, p);
1054 		vn_close(vp, flags & FMASK, fp->f_cred, p);
1055 		fdrop(fp, p);
1056 		p->p_retval[0] = indx;
1057 		return 0;
1058 	}
1059 
1060 	fp->f_data = (caddr_t)vp;
1061 	fp->f_flag = flags & FMASK;
1062 	fp->f_ops = &vnops;
1063 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1064 	VOP_UNLOCK(vp, 0, p);
1065 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1066 		lf.l_whence = SEEK_SET;
1067 		lf.l_start = 0;
1068 		lf.l_len = 0;
1069 		if (flags & O_EXLOCK)
1070 			lf.l_type = F_WRLCK;
1071 		else
1072 			lf.l_type = F_RDLCK;
1073 		type = F_FLOCK;
1074 		if ((flags & FNONBLOCK) == 0)
1075 			type |= F_WAIT;
1076 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1077 			goto bad;
1078 		fp->f_flag |= FHASLOCK;
1079 	}
1080 	if (flags & O_TRUNC) {
1081 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1082 			goto bad;
1083 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1084 		VATTR_NULL(&vat);
1085 		vat.va_size = 0;
1086 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1087 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
1088 		VOP_UNLOCK(vp, 0, p);
1089 		vn_finished_write(mp);
1090 		if (error)
1091 			goto bad;
1092 	}
1093 	/* assert that vn_open created a backing object if one is needed */
1094 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1095 		("open: vmio vnode has no backing object after vn_open"));
1096 	/*
1097 	 * Release our private reference, leaving the one associated with
1098 	 * the descriptor table intact.
1099 	 */
1100 	fdrop(fp, p);
1101 	p->p_retval[0] = indx;
1102 	return (0);
1103 bad:
1104 	if (fdp->fd_ofiles[indx] == fp) {
1105 		fdp->fd_ofiles[indx] = NULL;
1106 		fdrop(fp, p);
1107 	}
1108 	fdrop(fp, p);
1109 	return (error);
1110 }
1111 
1112 #ifdef COMPAT_43
1113 /*
1114  * Create a file.
1115  */
1116 #ifndef _SYS_SYSPROTO_H_
1117 struct ocreat_args {
1118 	char	*path;
1119 	int	mode;
1120 };
1121 #endif
1122 int
1123 ocreat(p, uap)
1124 	struct proc *p;
1125 	register struct ocreat_args /* {
1126 		syscallarg(char *) path;
1127 		syscallarg(int) mode;
1128 	} */ *uap;
1129 {
1130 	struct open_args /* {
1131 		syscallarg(char *) path;
1132 		syscallarg(int) flags;
1133 		syscallarg(int) mode;
1134 	} */ nuap;
1135 
1136 	SCARG(&nuap, path) = SCARG(uap, path);
1137 	SCARG(&nuap, mode) = SCARG(uap, mode);
1138 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1139 	return (open(p, &nuap));
1140 }
1141 #endif /* COMPAT_43 */
1142 
1143 /*
1144  * Create a special file.
1145  */
1146 #ifndef _SYS_SYSPROTO_H_
1147 struct mknod_args {
1148 	char	*path;
1149 	int	mode;
1150 	int	dev;
1151 };
1152 #endif
1153 /* ARGSUSED */
1154 int
1155 mknod(p, uap)
1156 	struct proc *p;
1157 	register struct mknod_args /* {
1158 		syscallarg(char *) path;
1159 		syscallarg(int) mode;
1160 		syscallarg(int) dev;
1161 	} */ *uap;
1162 {
1163 	struct vnode *vp;
1164 	struct mount *mp;
1165 	struct vattr vattr;
1166 	int error;
1167 	int whiteout = 0;
1168 	struct nameidata nd;
1169 
1170 	switch (SCARG(uap, mode) & S_IFMT) {
1171 	case S_IFCHR:
1172 	case S_IFBLK:
1173 		error = suser(p);
1174 		break;
1175 	default:
1176 		error = suser_xxx(0, p, PRISON_ROOT);
1177 		break;
1178 	}
1179 	if (error)
1180 		return (error);
1181 restart:
1182 	bwillwrite();
1183 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1184 	if ((error = namei(&nd)) != 0)
1185 		return (error);
1186 	vp = nd.ni_vp;
1187 	if (vp != NULL) {
1188 		vrele(vp);
1189 		error = EEXIST;
1190 	} else {
1191 		VATTR_NULL(&vattr);
1192 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1193 		vattr.va_rdev = SCARG(uap, dev);
1194 		whiteout = 0;
1195 
1196 		switch (SCARG(uap, mode) & S_IFMT) {
1197 		case S_IFMT:	/* used by badsect to flag bad sectors */
1198 			vattr.va_type = VBAD;
1199 			break;
1200 		case S_IFCHR:
1201 			vattr.va_type = VCHR;
1202 			break;
1203 		case S_IFBLK:
1204 			vattr.va_type = VBLK;
1205 			break;
1206 		case S_IFWHT:
1207 			whiteout = 1;
1208 			break;
1209 		default:
1210 			error = EINVAL;
1211 			break;
1212 		}
1213 	}
1214 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1215 		NDFREE(&nd, NDF_ONLY_PNBUF);
1216 		vput(nd.ni_dvp);
1217 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1218 			return (error);
1219 		goto restart;
1220 	}
1221 	if (!error) {
1222 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1223 		if (whiteout)
1224 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1225 		else {
1226 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1227 						&nd.ni_cnd, &vattr);
1228 			if (error == 0)
1229 				vput(nd.ni_vp);
1230 		}
1231 	}
1232 	NDFREE(&nd, NDF_ONLY_PNBUF);
1233 	vput(nd.ni_dvp);
1234 	vn_finished_write(mp);
1235 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1236 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1237 	return (error);
1238 }
1239 
1240 /*
1241  * Create a named pipe.
1242  */
1243 #ifndef _SYS_SYSPROTO_H_
1244 struct mkfifo_args {
1245 	char	*path;
1246 	int	mode;
1247 };
1248 #endif
1249 /* ARGSUSED */
1250 int
1251 mkfifo(p, uap)
1252 	struct proc *p;
1253 	register struct mkfifo_args /* {
1254 		syscallarg(char *) path;
1255 		syscallarg(int) mode;
1256 	} */ *uap;
1257 {
1258 	struct mount *mp;
1259 	struct vattr vattr;
1260 	int error;
1261 	struct nameidata nd;
1262 
1263 restart:
1264 	bwillwrite();
1265 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1266 	if ((error = namei(&nd)) != 0)
1267 		return (error);
1268 	if (nd.ni_vp != NULL) {
1269 		NDFREE(&nd, NDF_ONLY_PNBUF);
1270 		vrele(nd.ni_vp);
1271 		vput(nd.ni_dvp);
1272 		return (EEXIST);
1273 	}
1274 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1275 		NDFREE(&nd, NDF_ONLY_PNBUF);
1276 		vput(nd.ni_dvp);
1277 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1278 			return (error);
1279 		goto restart;
1280 	}
1281 	VATTR_NULL(&vattr);
1282 	vattr.va_type = VFIFO;
1283 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1284 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1285 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1286 	if (error == 0)
1287 		vput(nd.ni_vp);
1288 	NDFREE(&nd, NDF_ONLY_PNBUF);
1289 	vput(nd.ni_dvp);
1290 	vn_finished_write(mp);
1291 	return (error);
1292 }
1293 
1294 /*
1295  * Make a hard file link.
1296  */
1297 #ifndef _SYS_SYSPROTO_H_
1298 struct link_args {
1299 	char	*path;
1300 	char	*link;
1301 };
1302 #endif
1303 /* ARGSUSED */
1304 int
1305 link(p, uap)
1306 	struct proc *p;
1307 	register struct link_args /* {
1308 		syscallarg(char *) path;
1309 		syscallarg(char *) link;
1310 	} */ *uap;
1311 {
1312 	struct vnode *vp;
1313 	struct mount *mp;
1314 	struct nameidata nd;
1315 	int error;
1316 
1317 	bwillwrite();
1318 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1319 	if ((error = namei(&nd)) != 0)
1320 		return (error);
1321 	NDFREE(&nd, NDF_ONLY_PNBUF);
1322 	vp = nd.ni_vp;
1323 	if (vp->v_type == VDIR) {
1324 		vrele(vp);
1325 		return (EPERM);		/* POSIX */
1326 	}
1327 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1328 		vrele(vp);
1329 		return (error);
1330 	}
1331 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1332 	if ((error = namei(&nd)) == 0) {
1333 		if (nd.ni_vp != NULL) {
1334 			vrele(nd.ni_vp);
1335 			error = EEXIST;
1336 		} else {
1337 			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1338 			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1339 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1340 		}
1341 		NDFREE(&nd, NDF_ONLY_PNBUF);
1342 		vput(nd.ni_dvp);
1343 	}
1344 	vrele(vp);
1345 	vn_finished_write(mp);
1346 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1347 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1348 	return (error);
1349 }
1350 
1351 /*
1352  * Make a symbolic link.
1353  */
1354 #ifndef _SYS_SYSPROTO_H_
1355 struct symlink_args {
1356 	char	*path;
1357 	char	*link;
1358 };
1359 #endif
1360 /* ARGSUSED */
1361 int
1362 symlink(p, uap)
1363 	struct proc *p;
1364 	register struct symlink_args /* {
1365 		syscallarg(char *) path;
1366 		syscallarg(char *) link;
1367 	} */ *uap;
1368 {
1369 	struct mount *mp;
1370 	struct vattr vattr;
1371 	char *path;
1372 	int error;
1373 	struct nameidata nd;
1374 
1375 	path = zalloc(namei_zone);
1376 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1377 		goto out;
1378 restart:
1379 	bwillwrite();
1380 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1381 	if ((error = namei(&nd)) != 0)
1382 		goto out;
1383 	if (nd.ni_vp) {
1384 		NDFREE(&nd, NDF_ONLY_PNBUF);
1385 		vrele(nd.ni_vp);
1386 		vput(nd.ni_dvp);
1387 		error = EEXIST;
1388 		goto out;
1389 	}
1390 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1391 		NDFREE(&nd, NDF_ONLY_PNBUF);
1392 		vput(nd.ni_dvp);
1393 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1394 			return (error);
1395 		goto restart;
1396 	}
1397 	VATTR_NULL(&vattr);
1398 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1399 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1400 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1401 	NDFREE(&nd, NDF_ONLY_PNBUF);
1402 	if (error == 0)
1403 		vput(nd.ni_vp);
1404 	vput(nd.ni_dvp);
1405 	vn_finished_write(mp);
1406 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1407 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1408 out:
1409 	zfree(namei_zone, path);
1410 	return (error);
1411 }
1412 
1413 /*
1414  * Delete a whiteout from the filesystem.
1415  */
1416 /* ARGSUSED */
1417 int
1418 undelete(p, uap)
1419 	struct proc *p;
1420 	register struct undelete_args /* {
1421 		syscallarg(char *) path;
1422 	} */ *uap;
1423 {
1424 	int error;
1425 	struct mount *mp;
1426 	struct nameidata nd;
1427 
1428 restart:
1429 	bwillwrite();
1430 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1431 	    SCARG(uap, path), p);
1432 	error = namei(&nd);
1433 	if (error)
1434 		return (error);
1435 
1436 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1437 		NDFREE(&nd, NDF_ONLY_PNBUF);
1438 		if (nd.ni_vp)
1439 			vrele(nd.ni_vp);
1440 		vput(nd.ni_dvp);
1441 		return (EEXIST);
1442 	}
1443 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1444 		NDFREE(&nd, NDF_ONLY_PNBUF);
1445 		vput(nd.ni_dvp);
1446 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1447 			return (error);
1448 		goto restart;
1449 	}
1450 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1451 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1452 	NDFREE(&nd, NDF_ONLY_PNBUF);
1453 	vput(nd.ni_dvp);
1454 	vn_finished_write(mp);
1455 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1456 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1457 	return (error);
1458 }
1459 
1460 /*
1461  * Delete a name from the filesystem.
1462  */
1463 #ifndef _SYS_SYSPROTO_H_
1464 struct unlink_args {
1465 	char	*path;
1466 };
1467 #endif
1468 /* ARGSUSED */
1469 int
1470 unlink(p, uap)
1471 	struct proc *p;
1472 	struct unlink_args /* {
1473 		syscallarg(char *) path;
1474 	} */ *uap;
1475 {
1476 	struct mount *mp;
1477 	struct vnode *vp;
1478 	int error;
1479 	struct nameidata nd;
1480 
1481 restart:
1482 	bwillwrite();
1483 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1484 	if ((error = namei(&nd)) != 0)
1485 		return (error);
1486 	vp = nd.ni_vp;
1487 	if (vp->v_type == VDIR)
1488 		error = EPERM;		/* POSIX */
1489 	else {
1490 		/*
1491 		 * The root of a mounted filesystem cannot be deleted.
1492 		 *
1493 		 * XXX: can this only be a VDIR case?
1494 		 */
1495 		if (vp->v_flag & VROOT)
1496 			error = EBUSY;
1497 	}
1498 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1499 		NDFREE(&nd, NDF_ONLY_PNBUF);
1500 		vrele(vp);
1501 		vput(nd.ni_dvp);
1502 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1503 			return (error);
1504 		goto restart;
1505 	}
1506 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1507 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1508 	if (!error) {
1509 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1510 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1511 	}
1512 	NDFREE(&nd, NDF_ONLY_PNBUF);
1513 	vput(nd.ni_dvp);
1514 	vput(vp);
1515 	vn_finished_write(mp);
1516 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1517 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1518 	return (error);
1519 }
1520 
1521 /*
1522  * Reposition read/write file offset.
1523  */
1524 #ifndef _SYS_SYSPROTO_H_
1525 struct lseek_args {
1526 	int	fd;
1527 	int	pad;
1528 	off_t	offset;
1529 	int	whence;
1530 };
1531 #endif
1532 int
1533 lseek(p, uap)
1534 	struct proc *p;
1535 	register struct lseek_args /* {
1536 		syscallarg(int) fd;
1537 		syscallarg(int) pad;
1538 		syscallarg(off_t) offset;
1539 		syscallarg(int) whence;
1540 	} */ *uap;
1541 {
1542 	struct ucred *cred = p->p_ucred;
1543 	register struct filedesc *fdp = p->p_fd;
1544 	register struct file *fp;
1545 	struct vattr vattr;
1546 	int error;
1547 
1548 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1549 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1550 		return (EBADF);
1551 	if (fp->f_type != DTYPE_VNODE)
1552 		return (ESPIPE);
1553 	switch (SCARG(uap, whence)) {
1554 	case L_INCR:
1555 		fp->f_offset += SCARG(uap, offset);
1556 		break;
1557 	case L_XTND:
1558 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1559 		if (error)
1560 			return (error);
1561 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1562 		break;
1563 	case L_SET:
1564 		fp->f_offset = SCARG(uap, offset);
1565 		break;
1566 	default:
1567 		return (EINVAL);
1568 	}
1569 	*(off_t *)(p->p_retval) = fp->f_offset;
1570 	return (0);
1571 }
1572 
1573 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1574 /*
1575  * Reposition read/write file offset.
1576  */
1577 #ifndef _SYS_SYSPROTO_H_
1578 struct olseek_args {
1579 	int	fd;
1580 	long	offset;
1581 	int	whence;
1582 };
1583 #endif
1584 int
1585 olseek(p, uap)
1586 	struct proc *p;
1587 	register struct olseek_args /* {
1588 		syscallarg(int) fd;
1589 		syscallarg(long) offset;
1590 		syscallarg(int) whence;
1591 	} */ *uap;
1592 {
1593 	struct lseek_args /* {
1594 		syscallarg(int) fd;
1595 		syscallarg(int) pad;
1596 		syscallarg(off_t) offset;
1597 		syscallarg(int) whence;
1598 	} */ nuap;
1599 	int error;
1600 
1601 	SCARG(&nuap, fd) = SCARG(uap, fd);
1602 	SCARG(&nuap, offset) = SCARG(uap, offset);
1603 	SCARG(&nuap, whence) = SCARG(uap, whence);
1604 	error = lseek(p, &nuap);
1605 	return (error);
1606 }
1607 #endif /* COMPAT_43 */
1608 
1609 /*
1610  * Check access permissions.
1611  */
1612 #ifndef _SYS_SYSPROTO_H_
1613 struct access_args {
1614 	char	*path;
1615 	int	flags;
1616 };
1617 #endif
1618 int
1619 access(p, uap)
1620 	struct proc *p;
1621 	register struct access_args /* {
1622 		syscallarg(char *) path;
1623 		syscallarg(int) flags;
1624 	} */ *uap;
1625 {
1626 	struct ucred *cred, *tmpcred;
1627 	register struct vnode *vp;
1628 	int error, flags;
1629 	struct nameidata nd;
1630 
1631 	cred = p->p_ucred;
1632 	/*
1633 	 * Create and modify a temporary credential instead of one that
1634 	 * is potentially shared.  This could also mess up socket
1635 	 * buffer accounting which can run in an interrupt context.
1636 	 *
1637 	 * XXX - Depending on how "threads" are finally implemented, it
1638 	 * may be better to explicitly pass the credential to namei()
1639 	 * rather than to modify the potentially shared process structure.
1640 	 */
1641 	tmpcred = crdup(cred);
1642 	tmpcred->cr_uid = p->p_cred->p_ruid;
1643 	tmpcred->cr_groups[0] = p->p_cred->p_rgid;
1644 	p->p_ucred = tmpcred;
1645 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1646 	    SCARG(uap, path), p);
1647 	if ((error = namei(&nd)) != 0)
1648 		goto out1;
1649 	vp = nd.ni_vp;
1650 
1651 	/* Flags == 0 means only check for existence. */
1652 	if (SCARG(uap, flags)) {
1653 		flags = 0;
1654 		if (SCARG(uap, flags) & R_OK)
1655 			flags |= VREAD;
1656 		if (SCARG(uap, flags) & W_OK)
1657 			flags |= VWRITE;
1658 		if (SCARG(uap, flags) & X_OK)
1659 			flags |= VEXEC;
1660 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1661 			error = VOP_ACCESS(vp, flags, cred, p);
1662 	}
1663 	NDFREE(&nd, NDF_ONLY_PNBUF);
1664 	vput(vp);
1665 out1:
1666 	p->p_ucred = cred;
1667 	crfree(tmpcred);
1668 	return (error);
1669 }
1670 
1671 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1672 /*
1673  * Get file status; this version follows links.
1674  */
1675 #ifndef _SYS_SYSPROTO_H_
1676 struct ostat_args {
1677 	char	*path;
1678 	struct ostat *ub;
1679 };
1680 #endif
1681 /* ARGSUSED */
1682 int
1683 ostat(p, uap)
1684 	struct proc *p;
1685 	register struct ostat_args /* {
1686 		syscallarg(char *) path;
1687 		syscallarg(struct ostat *) ub;
1688 	} */ *uap;
1689 {
1690 	struct stat sb;
1691 	struct ostat osb;
1692 	int error;
1693 	struct nameidata nd;
1694 
1695 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1696 	    SCARG(uap, path), p);
1697 	if ((error = namei(&nd)) != 0)
1698 		return (error);
1699 	NDFREE(&nd, NDF_ONLY_PNBUF);
1700 	error = vn_stat(nd.ni_vp, &sb, p);
1701 	vput(nd.ni_vp);
1702 	if (error)
1703 		return (error);
1704 	cvtstat(&sb, &osb);
1705 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1706 	return (error);
1707 }
1708 
1709 /*
1710  * Get file status; this version does not follow links.
1711  */
1712 #ifndef _SYS_SYSPROTO_H_
1713 struct olstat_args {
1714 	char	*path;
1715 	struct ostat *ub;
1716 };
1717 #endif
1718 /* ARGSUSED */
1719 int
1720 olstat(p, uap)
1721 	struct proc *p;
1722 	register struct olstat_args /* {
1723 		syscallarg(char *) path;
1724 		syscallarg(struct ostat *) ub;
1725 	} */ *uap;
1726 {
1727 	struct vnode *vp;
1728 	struct stat sb;
1729 	struct ostat osb;
1730 	int error;
1731 	struct nameidata nd;
1732 
1733 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1734 	    SCARG(uap, path), p);
1735 	if ((error = namei(&nd)) != 0)
1736 		return (error);
1737 	vp = nd.ni_vp;
1738 	error = vn_stat(vp, &sb, p);
1739 	NDFREE(&nd, NDF_ONLY_PNBUF);
1740 	vput(vp);
1741 	if (error)
1742 		return (error);
1743 	cvtstat(&sb, &osb);
1744 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1745 	return (error);
1746 }
1747 
1748 /*
1749  * Convert from an old to a new stat structure.
1750  */
1751 void
1752 cvtstat(st, ost)
1753 	struct stat *st;
1754 	struct ostat *ost;
1755 {
1756 
1757 	ost->st_dev = st->st_dev;
1758 	ost->st_ino = st->st_ino;
1759 	ost->st_mode = st->st_mode;
1760 	ost->st_nlink = st->st_nlink;
1761 	ost->st_uid = st->st_uid;
1762 	ost->st_gid = st->st_gid;
1763 	ost->st_rdev = st->st_rdev;
1764 	if (st->st_size < (quad_t)1 << 32)
1765 		ost->st_size = st->st_size;
1766 	else
1767 		ost->st_size = -2;
1768 	ost->st_atime = st->st_atime;
1769 	ost->st_mtime = st->st_mtime;
1770 	ost->st_ctime = st->st_ctime;
1771 	ost->st_blksize = st->st_blksize;
1772 	ost->st_blocks = st->st_blocks;
1773 	ost->st_flags = st->st_flags;
1774 	ost->st_gen = st->st_gen;
1775 }
1776 #endif /* COMPAT_43 || COMPAT_SUNOS */
1777 
1778 /*
1779  * Get file status; this version follows links.
1780  */
1781 #ifndef _SYS_SYSPROTO_H_
1782 struct stat_args {
1783 	char	*path;
1784 	struct stat *ub;
1785 };
1786 #endif
1787 /* ARGSUSED */
1788 int
1789 stat(p, uap)
1790 	struct proc *p;
1791 	register struct stat_args /* {
1792 		syscallarg(char *) path;
1793 		syscallarg(struct stat *) ub;
1794 	} */ *uap;
1795 {
1796 	struct stat sb;
1797 	int error;
1798 	struct nameidata nd;
1799 
1800 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1801 	    SCARG(uap, path), p);
1802 	if ((error = namei(&nd)) != 0)
1803 		return (error);
1804 	error = vn_stat(nd.ni_vp, &sb, p);
1805 	NDFREE(&nd, NDF_ONLY_PNBUF);
1806 	vput(nd.ni_vp);
1807 	if (error)
1808 		return (error);
1809 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1810 	return (error);
1811 }
1812 
1813 /*
1814  * Get file status; this version does not follow links.
1815  */
1816 #ifndef _SYS_SYSPROTO_H_
1817 struct lstat_args {
1818 	char	*path;
1819 	struct stat *ub;
1820 };
1821 #endif
1822 /* ARGSUSED */
1823 int
1824 lstat(p, uap)
1825 	struct proc *p;
1826 	register struct lstat_args /* {
1827 		syscallarg(char *) path;
1828 		syscallarg(struct stat *) ub;
1829 	} */ *uap;
1830 {
1831 	int error;
1832 	struct vnode *vp;
1833 	struct stat sb;
1834 	struct nameidata nd;
1835 
1836 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1837 	    SCARG(uap, path), p);
1838 	if ((error = namei(&nd)) != 0)
1839 		return (error);
1840 	vp = nd.ni_vp;
1841 	error = vn_stat(vp, &sb, p);
1842 	NDFREE(&nd, NDF_ONLY_PNBUF);
1843 	vput(vp);
1844 	if (error)
1845 		return (error);
1846 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1847 	return (error);
1848 }
1849 
1850 /*
1851  * Implementation of the NetBSD stat() function.
1852  * XXX This should probably be collapsed with the FreeBSD version,
1853  * as the differences are only due to vn_stat() clearing spares at
1854  * the end of the structures.  vn_stat could be split to avoid this,
1855  * and thus collapse the following to close to zero code.
1856  */
1857 void
1858 cvtnstat(sb, nsb)
1859 	struct stat *sb;
1860 	struct nstat *nsb;
1861 {
1862 	nsb->st_dev = sb->st_dev;
1863 	nsb->st_ino = sb->st_ino;
1864 	nsb->st_mode = sb->st_mode;
1865 	nsb->st_nlink = sb->st_nlink;
1866 	nsb->st_uid = sb->st_uid;
1867 	nsb->st_gid = sb->st_gid;
1868 	nsb->st_rdev = sb->st_rdev;
1869 	nsb->st_atimespec = sb->st_atimespec;
1870 	nsb->st_mtimespec = sb->st_mtimespec;
1871 	nsb->st_ctimespec = sb->st_ctimespec;
1872 	nsb->st_size = sb->st_size;
1873 	nsb->st_blocks = sb->st_blocks;
1874 	nsb->st_blksize = sb->st_blksize;
1875 	nsb->st_flags = sb->st_flags;
1876 	nsb->st_gen = sb->st_gen;
1877 	nsb->st_qspare[0] = sb->st_qspare[0];
1878 	nsb->st_qspare[1] = sb->st_qspare[1];
1879 }
1880 
1881 #ifndef _SYS_SYSPROTO_H_
1882 struct nstat_args {
1883 	char	*path;
1884 	struct nstat *ub;
1885 };
1886 #endif
1887 /* ARGSUSED */
1888 int
1889 nstat(p, uap)
1890 	struct proc *p;
1891 	register struct nstat_args /* {
1892 		syscallarg(char *) path;
1893 		syscallarg(struct nstat *) ub;
1894 	} */ *uap;
1895 {
1896 	struct stat sb;
1897 	struct nstat nsb;
1898 	int error;
1899 	struct nameidata nd;
1900 
1901 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1902 	    SCARG(uap, path), p);
1903 	if ((error = namei(&nd)) != 0)
1904 		return (error);
1905 	NDFREE(&nd, NDF_ONLY_PNBUF);
1906 	error = vn_stat(nd.ni_vp, &sb, p);
1907 	vput(nd.ni_vp);
1908 	if (error)
1909 		return (error);
1910 	cvtnstat(&sb, &nsb);
1911 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1912 	return (error);
1913 }
1914 
1915 /*
1916  * NetBSD lstat.  Get file status; this version does not follow links.
1917  */
1918 #ifndef _SYS_SYSPROTO_H_
1919 struct lstat_args {
1920 	char	*path;
1921 	struct stat *ub;
1922 };
1923 #endif
1924 /* ARGSUSED */
1925 int
1926 nlstat(p, uap)
1927 	struct proc *p;
1928 	register struct nlstat_args /* {
1929 		syscallarg(char *) path;
1930 		syscallarg(struct nstat *) ub;
1931 	} */ *uap;
1932 {
1933 	int error;
1934 	struct vnode *vp;
1935 	struct stat sb;
1936 	struct nstat nsb;
1937 	struct nameidata nd;
1938 
1939 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1940 	    SCARG(uap, path), p);
1941 	if ((error = namei(&nd)) != 0)
1942 		return (error);
1943 	vp = nd.ni_vp;
1944 	NDFREE(&nd, NDF_ONLY_PNBUF);
1945 	error = vn_stat(vp, &sb, p);
1946 	vput(vp);
1947 	if (error)
1948 		return (error);
1949 	cvtnstat(&sb, &nsb);
1950 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1951 	return (error);
1952 }
1953 
1954 /*
1955  * Get configurable pathname variables.
1956  */
1957 #ifndef _SYS_SYSPROTO_H_
1958 struct pathconf_args {
1959 	char	*path;
1960 	int	name;
1961 };
1962 #endif
1963 /* ARGSUSED */
1964 int
1965 pathconf(p, uap)
1966 	struct proc *p;
1967 	register struct pathconf_args /* {
1968 		syscallarg(char *) path;
1969 		syscallarg(int) name;
1970 	} */ *uap;
1971 {
1972 	int error;
1973 	struct nameidata nd;
1974 
1975 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1976 	    SCARG(uap, path), p);
1977 	if ((error = namei(&nd)) != 0)
1978 		return (error);
1979 	NDFREE(&nd, NDF_ONLY_PNBUF);
1980 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
1981 	vput(nd.ni_vp);
1982 	return (error);
1983 }
1984 
1985 /*
1986  * Return target name of a symbolic link.
1987  */
1988 #ifndef _SYS_SYSPROTO_H_
1989 struct readlink_args {
1990 	char	*path;
1991 	char	*buf;
1992 	int	count;
1993 };
1994 #endif
1995 /* ARGSUSED */
1996 int
1997 readlink(p, uap)
1998 	struct proc *p;
1999 	register struct readlink_args /* {
2000 		syscallarg(char *) path;
2001 		syscallarg(char *) buf;
2002 		syscallarg(int) count;
2003 	} */ *uap;
2004 {
2005 	register struct vnode *vp;
2006 	struct iovec aiov;
2007 	struct uio auio;
2008 	int error;
2009 	struct nameidata nd;
2010 
2011 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2012 	    SCARG(uap, path), p);
2013 	if ((error = namei(&nd)) != 0)
2014 		return (error);
2015 	NDFREE(&nd, NDF_ONLY_PNBUF);
2016 	vp = nd.ni_vp;
2017 	if (vp->v_type != VLNK)
2018 		error = EINVAL;
2019 	else {
2020 		aiov.iov_base = SCARG(uap, buf);
2021 		aiov.iov_len = SCARG(uap, count);
2022 		auio.uio_iov = &aiov;
2023 		auio.uio_iovcnt = 1;
2024 		auio.uio_offset = 0;
2025 		auio.uio_rw = UIO_READ;
2026 		auio.uio_segflg = UIO_USERSPACE;
2027 		auio.uio_procp = p;
2028 		auio.uio_resid = SCARG(uap, count);
2029 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2030 	}
2031 	vput(vp);
2032 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2033 	return (error);
2034 }
2035 
2036 /*
2037  * Common implementation code for chflags() and fchflags().
2038  */
2039 static int
2040 setfflags(p, vp, flags)
2041 	struct proc *p;
2042 	struct vnode *vp;
2043 	int flags;
2044 {
2045 	int error;
2046 	struct mount *mp;
2047 	struct vattr vattr;
2048 
2049 	/*
2050 	 * Prevent non-root users from setting flags on devices.  When
2051 	 * a device is reused, users can retain ownership of the device
2052 	 * if they are allowed to set flags and programs assume that
2053 	 * chown can't fail when done as root.
2054 	 */
2055 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2056 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
2057 		return (error);
2058 
2059 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2060 		return (error);
2061 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2062 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2063 	VATTR_NULL(&vattr);
2064 	vattr.va_flags = flags;
2065 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2066 	VOP_UNLOCK(vp, 0, p);
2067 	vn_finished_write(mp);
2068 	return (error);
2069 }
2070 
2071 /*
2072  * Change flags of a file given a path name.
2073  */
2074 #ifndef _SYS_SYSPROTO_H_
2075 struct chflags_args {
2076 	char	*path;
2077 	int	flags;
2078 };
2079 #endif
2080 /* ARGSUSED */
2081 int
2082 chflags(p, uap)
2083 	struct proc *p;
2084 	register struct chflags_args /* {
2085 		syscallarg(char *) path;
2086 		syscallarg(int) flags;
2087 	} */ *uap;
2088 {
2089 	int error;
2090 	struct nameidata nd;
2091 
2092 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2093 	if ((error = namei(&nd)) != 0)
2094 		return (error);
2095 	NDFREE(&nd, NDF_ONLY_PNBUF);
2096 	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2097 	vrele(nd.ni_vp);
2098 	return error;
2099 }
2100 
2101 /*
2102  * Change flags of a file given a file descriptor.
2103  */
2104 #ifndef _SYS_SYSPROTO_H_
2105 struct fchflags_args {
2106 	int	fd;
2107 	int	flags;
2108 };
2109 #endif
2110 /* ARGSUSED */
2111 int
2112 fchflags(p, uap)
2113 	struct proc *p;
2114 	register struct fchflags_args /* {
2115 		syscallarg(int) fd;
2116 		syscallarg(int) flags;
2117 	} */ *uap;
2118 {
2119 	struct file *fp;
2120 	int error;
2121 
2122 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2123 		return (error);
2124 	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2125 }
2126 
2127 /*
2128  * Common implementation code for chmod(), lchmod() and fchmod().
2129  */
2130 static int
2131 setfmode(p, vp, mode)
2132 	struct proc *p;
2133 	struct vnode *vp;
2134 	int mode;
2135 {
2136 	int error;
2137 	struct mount *mp;
2138 	struct vattr vattr;
2139 
2140 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2141 		return (error);
2142 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2143 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2144 	VATTR_NULL(&vattr);
2145 	vattr.va_mode = mode & ALLPERMS;
2146 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2147 	VOP_UNLOCK(vp, 0, p);
2148 	vn_finished_write(mp);
2149 	return error;
2150 }
2151 
2152 /*
2153  * Change mode of a file given path name.
2154  */
2155 #ifndef _SYS_SYSPROTO_H_
2156 struct chmod_args {
2157 	char	*path;
2158 	int	mode;
2159 };
2160 #endif
2161 /* ARGSUSED */
2162 int
2163 chmod(p, uap)
2164 	struct proc *p;
2165 	register struct chmod_args /* {
2166 		syscallarg(char *) path;
2167 		syscallarg(int) mode;
2168 	} */ *uap;
2169 {
2170 	int error;
2171 	struct nameidata nd;
2172 
2173 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2174 	if ((error = namei(&nd)) != 0)
2175 		return (error);
2176 	NDFREE(&nd, NDF_ONLY_PNBUF);
2177 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2178 	vrele(nd.ni_vp);
2179 	return error;
2180 }
2181 
2182 /*
2183  * Change mode of a file given path name (don't follow links.)
2184  */
2185 #ifndef _SYS_SYSPROTO_H_
2186 struct lchmod_args {
2187 	char	*path;
2188 	int	mode;
2189 };
2190 #endif
2191 /* ARGSUSED */
2192 int
2193 lchmod(p, uap)
2194 	struct proc *p;
2195 	register struct lchmod_args /* {
2196 		syscallarg(char *) path;
2197 		syscallarg(int) mode;
2198 	} */ *uap;
2199 {
2200 	int error;
2201 	struct nameidata nd;
2202 
2203 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2204 	if ((error = namei(&nd)) != 0)
2205 		return (error);
2206 	NDFREE(&nd, NDF_ONLY_PNBUF);
2207 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2208 	vrele(nd.ni_vp);
2209 	return error;
2210 }
2211 
2212 /*
2213  * Change mode of a file given a file descriptor.
2214  */
2215 #ifndef _SYS_SYSPROTO_H_
2216 struct fchmod_args {
2217 	int	fd;
2218 	int	mode;
2219 };
2220 #endif
2221 /* ARGSUSED */
2222 int
2223 fchmod(p, uap)
2224 	struct proc *p;
2225 	register struct fchmod_args /* {
2226 		syscallarg(int) fd;
2227 		syscallarg(int) mode;
2228 	} */ *uap;
2229 {
2230 	struct file *fp;
2231 	int error;
2232 
2233 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2234 		return (error);
2235 	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2236 }
2237 
2238 /*
2239  * Common implementation for chown(), lchown(), and fchown()
2240  */
2241 static int
2242 setfown(p, vp, uid, gid)
2243 	struct proc *p;
2244 	struct vnode *vp;
2245 	uid_t uid;
2246 	gid_t gid;
2247 {
2248 	int error;
2249 	struct mount *mp;
2250 	struct vattr vattr;
2251 
2252 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2253 		return (error);
2254 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2255 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2256 	VATTR_NULL(&vattr);
2257 	vattr.va_uid = uid;
2258 	vattr.va_gid = gid;
2259 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2260 	VOP_UNLOCK(vp, 0, p);
2261 	vn_finished_write(mp);
2262 	return error;
2263 }
2264 
2265 /*
2266  * Set ownership given a path name.
2267  */
2268 #ifndef _SYS_SYSPROTO_H_
2269 struct chown_args {
2270 	char	*path;
2271 	int	uid;
2272 	int	gid;
2273 };
2274 #endif
2275 /* ARGSUSED */
2276 int
2277 chown(p, uap)
2278 	struct proc *p;
2279 	register struct chown_args /* {
2280 		syscallarg(char *) path;
2281 		syscallarg(int) uid;
2282 		syscallarg(int) gid;
2283 	} */ *uap;
2284 {
2285 	int error;
2286 	struct nameidata nd;
2287 
2288 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2289 	if ((error = namei(&nd)) != 0)
2290 		return (error);
2291 	NDFREE(&nd, NDF_ONLY_PNBUF);
2292 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2293 	vrele(nd.ni_vp);
2294 	return (error);
2295 }
2296 
2297 /*
2298  * Set ownership given a path name, do not cross symlinks.
2299  */
2300 #ifndef _SYS_SYSPROTO_H_
2301 struct lchown_args {
2302 	char	*path;
2303 	int	uid;
2304 	int	gid;
2305 };
2306 #endif
2307 /* ARGSUSED */
2308 int
2309 lchown(p, uap)
2310 	struct proc *p;
2311 	register struct lchown_args /* {
2312 		syscallarg(char *) path;
2313 		syscallarg(int) uid;
2314 		syscallarg(int) gid;
2315 	} */ *uap;
2316 {
2317 	int error;
2318 	struct nameidata nd;
2319 
2320 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2321 	if ((error = namei(&nd)) != 0)
2322 		return (error);
2323 	NDFREE(&nd, NDF_ONLY_PNBUF);
2324 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2325 	vrele(nd.ni_vp);
2326 	return (error);
2327 }
2328 
2329 /*
2330  * Set ownership given a file descriptor.
2331  */
2332 #ifndef _SYS_SYSPROTO_H_
2333 struct fchown_args {
2334 	int	fd;
2335 	int	uid;
2336 	int	gid;
2337 };
2338 #endif
2339 /* ARGSUSED */
2340 int
2341 fchown(p, uap)
2342 	struct proc *p;
2343 	register struct fchown_args /* {
2344 		syscallarg(int) fd;
2345 		syscallarg(int) uid;
2346 		syscallarg(int) gid;
2347 	} */ *uap;
2348 {
2349 	struct file *fp;
2350 	int error;
2351 
2352 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2353 		return (error);
2354 	return setfown(p, (struct vnode *)fp->f_data,
2355 		SCARG(uap, uid), SCARG(uap, gid));
2356 }
2357 
2358 /*
2359  * Common implementation code for utimes(), lutimes(), and futimes().
2360  */
2361 static int
2362 getutimes(usrtvp, tsp)
2363 	const struct timeval *usrtvp;
2364 	struct timespec *tsp;
2365 {
2366 	struct timeval tv[2];
2367 	int error;
2368 
2369 	if (usrtvp == NULL) {
2370 		microtime(&tv[0]);
2371 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2372 		tsp[1] = tsp[0];
2373 	} else {
2374 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2375 			return (error);
2376 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2377 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2378 	}
2379 	return 0;
2380 }
2381 
2382 /*
2383  * Common implementation code for utimes(), lutimes(), and futimes().
2384  */
2385 static int
2386 setutimes(p, vp, ts, nullflag)
2387 	struct proc *p;
2388 	struct vnode *vp;
2389 	const struct timespec *ts;
2390 	int nullflag;
2391 {
2392 	int error;
2393 	struct mount *mp;
2394 	struct vattr vattr;
2395 
2396 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2397 		return (error);
2398 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2399 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2400 	VATTR_NULL(&vattr);
2401 	vattr.va_atime = ts[0];
2402 	vattr.va_mtime = ts[1];
2403 	if (nullflag)
2404 		vattr.va_vaflags |= VA_UTIMES_NULL;
2405 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2406 	VOP_UNLOCK(vp, 0, p);
2407 	vn_finished_write(mp);
2408 	return error;
2409 }
2410 
2411 /*
2412  * Set the access and modification times of a file.
2413  */
2414 #ifndef _SYS_SYSPROTO_H_
2415 struct utimes_args {
2416 	char	*path;
2417 	struct	timeval *tptr;
2418 };
2419 #endif
2420 /* ARGSUSED */
2421 int
2422 utimes(p, uap)
2423 	struct proc *p;
2424 	register struct utimes_args /* {
2425 		syscallarg(char *) path;
2426 		syscallarg(struct timeval *) tptr;
2427 	} */ *uap;
2428 {
2429 	struct timespec ts[2];
2430 	struct timeval *usrtvp;
2431 	int error;
2432 	struct nameidata nd;
2433 
2434 	usrtvp = SCARG(uap, tptr);
2435 	if ((error = getutimes(usrtvp, ts)) != 0)
2436 		return (error);
2437 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2438 	if ((error = namei(&nd)) != 0)
2439 		return (error);
2440 	NDFREE(&nd, NDF_ONLY_PNBUF);
2441 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2442 	vrele(nd.ni_vp);
2443 	return (error);
2444 }
2445 
2446 /*
2447  * Set the access and modification times of a file.
2448  */
2449 #ifndef _SYS_SYSPROTO_H_
2450 struct lutimes_args {
2451 	char	*path;
2452 	struct	timeval *tptr;
2453 };
2454 #endif
2455 /* ARGSUSED */
2456 int
2457 lutimes(p, uap)
2458 	struct proc *p;
2459 	register struct lutimes_args /* {
2460 		syscallarg(char *) path;
2461 		syscallarg(struct timeval *) tptr;
2462 	} */ *uap;
2463 {
2464 	struct timespec ts[2];
2465 	struct timeval *usrtvp;
2466 	int error;
2467 	struct nameidata nd;
2468 
2469 	usrtvp = SCARG(uap, tptr);
2470 	if ((error = getutimes(usrtvp, ts)) != 0)
2471 		return (error);
2472 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2473 	if ((error = namei(&nd)) != 0)
2474 		return (error);
2475 	NDFREE(&nd, NDF_ONLY_PNBUF);
2476 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2477 	vrele(nd.ni_vp);
2478 	return (error);
2479 }
2480 
2481 /*
2482  * Set the access and modification times of a file.
2483  */
2484 #ifndef _SYS_SYSPROTO_H_
2485 struct futimes_args {
2486 	int	fd;
2487 	struct	timeval *tptr;
2488 };
2489 #endif
2490 /* ARGSUSED */
2491 int
2492 futimes(p, uap)
2493 	struct proc *p;
2494 	register struct futimes_args /* {
2495 		syscallarg(int ) fd;
2496 		syscallarg(struct timeval *) tptr;
2497 	} */ *uap;
2498 {
2499 	struct timespec ts[2];
2500 	struct file *fp;
2501 	struct timeval *usrtvp;
2502 	int error;
2503 
2504 	usrtvp = SCARG(uap, tptr);
2505 	if ((error = getutimes(usrtvp, ts)) != 0)
2506 		return (error);
2507 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2508 		return (error);
2509 	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2510 }
2511 
2512 /*
2513  * Truncate a file given its path name.
2514  */
2515 #ifndef _SYS_SYSPROTO_H_
2516 struct truncate_args {
2517 	char	*path;
2518 	int	pad;
2519 	off_t	length;
2520 };
2521 #endif
2522 /* ARGSUSED */
2523 int
2524 truncate(p, uap)
2525 	struct proc *p;
2526 	register struct truncate_args /* {
2527 		syscallarg(char *) path;
2528 		syscallarg(int) pad;
2529 		syscallarg(off_t) length;
2530 	} */ *uap;
2531 {
2532 	struct mount *mp;
2533 	struct vnode *vp;
2534 	struct vattr vattr;
2535 	int error;
2536 	struct nameidata nd;
2537 
2538 	if (uap->length < 0)
2539 		return(EINVAL);
2540 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2541 	if ((error = namei(&nd)) != 0)
2542 		return (error);
2543 	vp = nd.ni_vp;
2544 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2545 		vrele(vp);
2546 		return (error);
2547 	}
2548 	NDFREE(&nd, NDF_ONLY_PNBUF);
2549 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2550 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2551 	if (vp->v_type == VDIR)
2552 		error = EISDIR;
2553 	else if ((error = vn_writechk(vp)) == 0 &&
2554 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2555 		VATTR_NULL(&vattr);
2556 		vattr.va_size = SCARG(uap, length);
2557 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2558 	}
2559 	vput(vp);
2560 	vn_finished_write(mp);
2561 	return (error);
2562 }
2563 
2564 /*
2565  * Truncate a file given a file descriptor.
2566  */
2567 #ifndef _SYS_SYSPROTO_H_
2568 struct ftruncate_args {
2569 	int	fd;
2570 	int	pad;
2571 	off_t	length;
2572 };
2573 #endif
2574 /* ARGSUSED */
2575 int
2576 ftruncate(p, uap)
2577 	struct proc *p;
2578 	register struct ftruncate_args /* {
2579 		syscallarg(int) fd;
2580 		syscallarg(int) pad;
2581 		syscallarg(off_t) length;
2582 	} */ *uap;
2583 {
2584 	struct mount *mp;
2585 	struct vattr vattr;
2586 	struct vnode *vp;
2587 	struct file *fp;
2588 	int error;
2589 
2590 	if (uap->length < 0)
2591 		return(EINVAL);
2592 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2593 		return (error);
2594 	if ((fp->f_flag & FWRITE) == 0)
2595 		return (EINVAL);
2596 	vp = (struct vnode *)fp->f_data;
2597 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2598 		return (error);
2599 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2600 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2601 	if (vp->v_type == VDIR)
2602 		error = EISDIR;
2603 	else if ((error = vn_writechk(vp)) == 0) {
2604 		VATTR_NULL(&vattr);
2605 		vattr.va_size = SCARG(uap, length);
2606 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2607 	}
2608 	VOP_UNLOCK(vp, 0, p);
2609 	vn_finished_write(mp);
2610 	return (error);
2611 }
2612 
2613 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2614 /*
2615  * Truncate a file given its path name.
2616  */
2617 #ifndef _SYS_SYSPROTO_H_
2618 struct otruncate_args {
2619 	char	*path;
2620 	long	length;
2621 };
2622 #endif
2623 /* ARGSUSED */
2624 int
2625 otruncate(p, uap)
2626 	struct proc *p;
2627 	register struct otruncate_args /* {
2628 		syscallarg(char *) path;
2629 		syscallarg(long) length;
2630 	} */ *uap;
2631 {
2632 	struct truncate_args /* {
2633 		syscallarg(char *) path;
2634 		syscallarg(int) pad;
2635 		syscallarg(off_t) length;
2636 	} */ nuap;
2637 
2638 	SCARG(&nuap, path) = SCARG(uap, path);
2639 	SCARG(&nuap, length) = SCARG(uap, length);
2640 	return (truncate(p, &nuap));
2641 }
2642 
2643 /*
2644  * Truncate a file given a file descriptor.
2645  */
2646 #ifndef _SYS_SYSPROTO_H_
2647 struct oftruncate_args {
2648 	int	fd;
2649 	long	length;
2650 };
2651 #endif
2652 /* ARGSUSED */
2653 int
2654 oftruncate(p, uap)
2655 	struct proc *p;
2656 	register struct oftruncate_args /* {
2657 		syscallarg(int) fd;
2658 		syscallarg(long) length;
2659 	} */ *uap;
2660 {
2661 	struct ftruncate_args /* {
2662 		syscallarg(int) fd;
2663 		syscallarg(int) pad;
2664 		syscallarg(off_t) length;
2665 	} */ nuap;
2666 
2667 	SCARG(&nuap, fd) = SCARG(uap, fd);
2668 	SCARG(&nuap, length) = SCARG(uap, length);
2669 	return (ftruncate(p, &nuap));
2670 }
2671 #endif /* COMPAT_43 || COMPAT_SUNOS */
2672 
2673 /*
2674  * Sync an open file.
2675  */
2676 #ifndef _SYS_SYSPROTO_H_
2677 struct fsync_args {
2678 	int	fd;
2679 };
2680 #endif
2681 /* ARGSUSED */
2682 int
2683 fsync(p, uap)
2684 	struct proc *p;
2685 	struct fsync_args /* {
2686 		syscallarg(int) fd;
2687 	} */ *uap;
2688 {
2689 	struct vnode *vp;
2690 	struct mount *mp;
2691 	struct file *fp;
2692 	vm_object_t obj;
2693 	int error;
2694 
2695 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2696 		return (error);
2697 	vp = (struct vnode *)fp->f_data;
2698 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2699 		return (error);
2700 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2701 	if (VOP_GETVOBJECT(vp, &obj) == 0)
2702 		vm_object_page_clean(obj, 0, 0, 0);
2703 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2704 #ifdef SOFTUPDATES
2705 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2706 	    error = softdep_fsync(vp);
2707 #endif
2708 
2709 	VOP_UNLOCK(vp, 0, p);
2710 	vn_finished_write(mp);
2711 	return (error);
2712 }
2713 
2714 /*
2715  * Rename files.  Source and destination must either both be directories,
2716  * or both not be directories.  If target is a directory, it must be empty.
2717  */
2718 #ifndef _SYS_SYSPROTO_H_
2719 struct rename_args {
2720 	char	*from;
2721 	char	*to;
2722 };
2723 #endif
2724 /* ARGSUSED */
2725 int
2726 rename(p, uap)
2727 	struct proc *p;
2728 	register struct rename_args /* {
2729 		syscallarg(char *) from;
2730 		syscallarg(char *) to;
2731 	} */ *uap;
2732 {
2733 	struct mount *mp;
2734 	struct vnode *tvp, *fvp, *tdvp;
2735 	struct nameidata fromnd, tond;
2736 	int error;
2737 
2738 	bwillwrite();
2739 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2740 	    SCARG(uap, from), p);
2741 	if ((error = namei(&fromnd)) != 0)
2742 		return (error);
2743 	fvp = fromnd.ni_vp;
2744 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2745 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2746 		vrele(fromnd.ni_dvp);
2747 		vrele(fvp);
2748 		goto out1;
2749 	}
2750 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2751 	    UIO_USERSPACE, SCARG(uap, to), p);
2752 	if (fromnd.ni_vp->v_type == VDIR)
2753 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2754 	if ((error = namei(&tond)) != 0) {
2755 		/* Translate error code for rename("dir1", "dir2/."). */
2756 		if (error == EISDIR && fvp->v_type == VDIR)
2757 			error = EINVAL;
2758 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2759 		vrele(fromnd.ni_dvp);
2760 		vrele(fvp);
2761 		goto out1;
2762 	}
2763 	tdvp = tond.ni_dvp;
2764 	tvp = tond.ni_vp;
2765 	if (tvp != NULL) {
2766 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2767 			error = ENOTDIR;
2768 			goto out;
2769 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2770 			error = EISDIR;
2771 			goto out;
2772 		}
2773 	}
2774 	if (fvp == tdvp)
2775 		error = EINVAL;
2776 	/*
2777 	 * If source is the same as the destination (that is the
2778 	 * same inode number with the same name in the same directory),
2779 	 * then there is nothing to do.
2780 	 */
2781 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2782 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2783 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2784 	      fromnd.ni_cnd.cn_namelen))
2785 		error = -1;
2786 out:
2787 	if (!error) {
2788 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2789 		if (fromnd.ni_dvp != tdvp) {
2790 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2791 		}
2792 		if (tvp) {
2793 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2794 		}
2795 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2796 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2797 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2798 		NDFREE(&tond, NDF_ONLY_PNBUF);
2799 	} else {
2800 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2801 		NDFREE(&tond, NDF_ONLY_PNBUF);
2802 		if (tdvp == tvp)
2803 			vrele(tdvp);
2804 		else
2805 			vput(tdvp);
2806 		if (tvp)
2807 			vput(tvp);
2808 		vrele(fromnd.ni_dvp);
2809 		vrele(fvp);
2810 	}
2811 	vrele(tond.ni_startdir);
2812 	vn_finished_write(mp);
2813 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2814 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2815 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2816 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2817 out1:
2818 	if (fromnd.ni_startdir)
2819 		vrele(fromnd.ni_startdir);
2820 	if (error == -1)
2821 		return (0);
2822 	return (error);
2823 }
2824 
2825 /*
2826  * Make a directory file.
2827  */
2828 #ifndef _SYS_SYSPROTO_H_
2829 struct mkdir_args {
2830 	char	*path;
2831 	int	mode;
2832 };
2833 #endif
2834 /* ARGSUSED */
2835 int
2836 mkdir(p, uap)
2837 	struct proc *p;
2838 	register struct mkdir_args /* {
2839 		syscallarg(char *) path;
2840 		syscallarg(int) mode;
2841 	} */ *uap;
2842 {
2843 	struct mount *mp;
2844 	struct vnode *vp;
2845 	struct vattr vattr;
2846 	int error;
2847 	struct nameidata nd;
2848 
2849 restart:
2850 	bwillwrite();
2851 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2852 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2853 	if ((error = namei(&nd)) != 0)
2854 		return (error);
2855 	vp = nd.ni_vp;
2856 	if (vp != NULL) {
2857 		NDFREE(&nd, NDF_ONLY_PNBUF);
2858 		vrele(vp);
2859 		vput(nd.ni_dvp);
2860 		return (EEXIST);
2861 	}
2862 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2863 		NDFREE(&nd, NDF_ONLY_PNBUF);
2864 		vput(nd.ni_dvp);
2865 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2866 			return (error);
2867 		goto restart;
2868 	}
2869 	VATTR_NULL(&vattr);
2870 	vattr.va_type = VDIR;
2871 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2872 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2873 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2874 	NDFREE(&nd, NDF_ONLY_PNBUF);
2875 	vput(nd.ni_dvp);
2876 	if (!error)
2877 		vput(nd.ni_vp);
2878 	vn_finished_write(mp);
2879 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2880 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2881 	return (error);
2882 }
2883 
2884 /*
2885  * Remove a directory file.
2886  */
2887 #ifndef _SYS_SYSPROTO_H_
2888 struct rmdir_args {
2889 	char	*path;
2890 };
2891 #endif
2892 /* ARGSUSED */
2893 int
2894 rmdir(p, uap)
2895 	struct proc *p;
2896 	struct rmdir_args /* {
2897 		syscallarg(char *) path;
2898 	} */ *uap;
2899 {
2900 	struct mount *mp;
2901 	struct vnode *vp;
2902 	int error;
2903 	struct nameidata nd;
2904 
2905 restart:
2906 	bwillwrite();
2907 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2908 	    SCARG(uap, path), p);
2909 	if ((error = namei(&nd)) != 0)
2910 		return (error);
2911 	vp = nd.ni_vp;
2912 	if (vp->v_type != VDIR) {
2913 		error = ENOTDIR;
2914 		goto out;
2915 	}
2916 	/*
2917 	 * No rmdir "." please.
2918 	 */
2919 	if (nd.ni_dvp == vp) {
2920 		error = EINVAL;
2921 		goto out;
2922 	}
2923 	/*
2924 	 * The root of a mounted filesystem cannot be deleted.
2925 	 */
2926 	if (vp->v_flag & VROOT) {
2927 		error = EBUSY;
2928 		goto out;
2929 	}
2930 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2931 		NDFREE(&nd, NDF_ONLY_PNBUF);
2932 		if (nd.ni_dvp == vp)
2933 			vrele(nd.ni_dvp);
2934 		else
2935 			vput(nd.ni_dvp);
2936 		vput(vp);
2937 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2938 			return (error);
2939 		goto restart;
2940 	}
2941 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2942 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2943 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2944 	vn_finished_write(mp);
2945 out:
2946 	NDFREE(&nd, NDF_ONLY_PNBUF);
2947 	if (nd.ni_dvp == vp)
2948 		vrele(nd.ni_dvp);
2949 	else
2950 		vput(nd.ni_dvp);
2951 	vput(vp);
2952 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2953 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2954 	return (error);
2955 }
2956 
2957 #ifdef COMPAT_43
2958 /*
2959  * Read a block of directory entries in a file system independent format.
2960  */
2961 #ifndef _SYS_SYSPROTO_H_
2962 struct ogetdirentries_args {
2963 	int	fd;
2964 	char	*buf;
2965 	u_int	count;
2966 	long	*basep;
2967 };
2968 #endif
2969 int
2970 ogetdirentries(p, uap)
2971 	struct proc *p;
2972 	register struct ogetdirentries_args /* {
2973 		syscallarg(int) fd;
2974 		syscallarg(char *) buf;
2975 		syscallarg(u_int) count;
2976 		syscallarg(long *) basep;
2977 	} */ *uap;
2978 {
2979 	struct vnode *vp;
2980 	struct file *fp;
2981 	struct uio auio, kuio;
2982 	struct iovec aiov, kiov;
2983 	struct dirent *dp, *edp;
2984 	caddr_t dirbuf;
2985 	int error, eofflag, readcnt;
2986 	long loff;
2987 
2988 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2989 		return (error);
2990 	if ((fp->f_flag & FREAD) == 0)
2991 		return (EBADF);
2992 	vp = (struct vnode *)fp->f_data;
2993 unionread:
2994 	if (vp->v_type != VDIR)
2995 		return (EINVAL);
2996 	aiov.iov_base = SCARG(uap, buf);
2997 	aiov.iov_len = SCARG(uap, count);
2998 	auio.uio_iov = &aiov;
2999 	auio.uio_iovcnt = 1;
3000 	auio.uio_rw = UIO_READ;
3001 	auio.uio_segflg = UIO_USERSPACE;
3002 	auio.uio_procp = p;
3003 	auio.uio_resid = SCARG(uap, count);
3004 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3005 	loff = auio.uio_offset = fp->f_offset;
3006 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3007 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3008 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3009 			    NULL, NULL);
3010 			fp->f_offset = auio.uio_offset;
3011 		} else
3012 #	endif
3013 	{
3014 		kuio = auio;
3015 		kuio.uio_iov = &kiov;
3016 		kuio.uio_segflg = UIO_SYSSPACE;
3017 		kiov.iov_len = SCARG(uap, count);
3018 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3019 		kiov.iov_base = dirbuf;
3020 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3021 			    NULL, NULL);
3022 		fp->f_offset = kuio.uio_offset;
3023 		if (error == 0) {
3024 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3025 			edp = (struct dirent *)&dirbuf[readcnt];
3026 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3027 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3028 					/*
3029 					 * The expected low byte of
3030 					 * dp->d_namlen is our dp->d_type.
3031 					 * The high MBZ byte of dp->d_namlen
3032 					 * is our dp->d_namlen.
3033 					 */
3034 					dp->d_type = dp->d_namlen;
3035 					dp->d_namlen = 0;
3036 #				else
3037 					/*
3038 					 * The dp->d_type is the high byte
3039 					 * of the expected dp->d_namlen,
3040 					 * so must be zero'ed.
3041 					 */
3042 					dp->d_type = 0;
3043 #				endif
3044 				if (dp->d_reclen > 0) {
3045 					dp = (struct dirent *)
3046 					    ((char *)dp + dp->d_reclen);
3047 				} else {
3048 					error = EIO;
3049 					break;
3050 				}
3051 			}
3052 			if (dp >= edp)
3053 				error = uiomove(dirbuf, readcnt, &auio);
3054 		}
3055 		FREE(dirbuf, M_TEMP);
3056 	}
3057 	VOP_UNLOCK(vp, 0, p);
3058 	if (error)
3059 		return (error);
3060 	if (SCARG(uap, count) == auio.uio_resid) {
3061 		if (union_dircheckp) {
3062 			error = union_dircheckp(p, &vp, fp);
3063 			if (error == -1)
3064 				goto unionread;
3065 			if (error)
3066 				return (error);
3067 		}
3068 		if ((vp->v_flag & VROOT) &&
3069 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3070 			struct vnode *tvp = vp;
3071 			vp = vp->v_mount->mnt_vnodecovered;
3072 			VREF(vp);
3073 			fp->f_data = (caddr_t) vp;
3074 			fp->f_offset = 0;
3075 			vrele(tvp);
3076 			goto unionread;
3077 		}
3078 	}
3079 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3080 	    sizeof(long));
3081 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3082 	return (error);
3083 }
3084 #endif /* COMPAT_43 */
3085 
3086 /*
3087  * Read a block of directory entries in a file system independent format.
3088  */
3089 #ifndef _SYS_SYSPROTO_H_
3090 struct getdirentries_args {
3091 	int	fd;
3092 	char	*buf;
3093 	u_int	count;
3094 	long	*basep;
3095 };
3096 #endif
3097 int
3098 getdirentries(p, uap)
3099 	struct proc *p;
3100 	register struct getdirentries_args /* {
3101 		syscallarg(int) fd;
3102 		syscallarg(char *) buf;
3103 		syscallarg(u_int) count;
3104 		syscallarg(long *) basep;
3105 	} */ *uap;
3106 {
3107 	struct vnode *vp;
3108 	struct file *fp;
3109 	struct uio auio;
3110 	struct iovec aiov;
3111 	long loff;
3112 	int error, eofflag;
3113 
3114 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3115 		return (error);
3116 	if ((fp->f_flag & FREAD) == 0)
3117 		return (EBADF);
3118 	vp = (struct vnode *)fp->f_data;
3119 unionread:
3120 	if (vp->v_type != VDIR)
3121 		return (EINVAL);
3122 	aiov.iov_base = SCARG(uap, buf);
3123 	aiov.iov_len = SCARG(uap, count);
3124 	auio.uio_iov = &aiov;
3125 	auio.uio_iovcnt = 1;
3126 	auio.uio_rw = UIO_READ;
3127 	auio.uio_segflg = UIO_USERSPACE;
3128 	auio.uio_procp = p;
3129 	auio.uio_resid = SCARG(uap, count);
3130 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3131 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3132 	loff = auio.uio_offset = fp->f_offset;
3133 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3134 	fp->f_offset = auio.uio_offset;
3135 	VOP_UNLOCK(vp, 0, p);
3136 	if (error)
3137 		return (error);
3138 	if (SCARG(uap, count) == auio.uio_resid) {
3139 		if (union_dircheckp) {
3140 			error = union_dircheckp(p, &vp, fp);
3141 			if (error == -1)
3142 				goto unionread;
3143 			if (error)
3144 				return (error);
3145 		}
3146 		if ((vp->v_flag & VROOT) &&
3147 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3148 			struct vnode *tvp = vp;
3149 			vp = vp->v_mount->mnt_vnodecovered;
3150 			VREF(vp);
3151 			fp->f_data = (caddr_t) vp;
3152 			fp->f_offset = 0;
3153 			vrele(tvp);
3154 			goto unionread;
3155 		}
3156 	}
3157 	if (SCARG(uap, basep) != NULL) {
3158 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3159 		    sizeof(long));
3160 	}
3161 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3162 	return (error);
3163 }
3164 #ifndef _SYS_SYSPROTO_H_
3165 struct getdents_args {
3166 	int fd;
3167 	char *buf;
3168 	size_t count;
3169 };
3170 #endif
3171 int
3172 getdents(p, uap)
3173 	struct proc *p;
3174 	register struct getdents_args /* {
3175 		syscallarg(int) fd;
3176 		syscallarg(char *) buf;
3177 		syscallarg(u_int) count;
3178 	} */ *uap;
3179 {
3180 	struct getdirentries_args ap;
3181 	ap.fd = uap->fd;
3182 	ap.buf = uap->buf;
3183 	ap.count = uap->count;
3184 	ap.basep = NULL;
3185 	return getdirentries(p, &ap);
3186 }
3187 
3188 /*
3189  * Set the mode mask for creation of filesystem nodes.
3190  *
3191  * MP SAFE
3192  */
3193 #ifndef _SYS_SYSPROTO_H_
3194 struct umask_args {
3195 	int	newmask;
3196 };
3197 #endif
3198 int
3199 umask(p, uap)
3200 	struct proc *p;
3201 	struct umask_args /* {
3202 		syscallarg(int) newmask;
3203 	} */ *uap;
3204 {
3205 	register struct filedesc *fdp;
3206 
3207 	fdp = p->p_fd;
3208 	p->p_retval[0] = fdp->fd_cmask;
3209 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3210 	return (0);
3211 }
3212 
3213 /*
3214  * Void all references to file by ripping underlying filesystem
3215  * away from vnode.
3216  */
3217 #ifndef _SYS_SYSPROTO_H_
3218 struct revoke_args {
3219 	char	*path;
3220 };
3221 #endif
3222 /* ARGSUSED */
3223 int
3224 revoke(p, uap)
3225 	struct proc *p;
3226 	register struct revoke_args /* {
3227 		syscallarg(char *) path;
3228 	} */ *uap;
3229 {
3230 	struct mount *mp;
3231 	struct vnode *vp;
3232 	struct vattr vattr;
3233 	int error;
3234 	struct nameidata nd;
3235 
3236 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3237 	if ((error = namei(&nd)) != 0)
3238 		return (error);
3239 	vp = nd.ni_vp;
3240 	NDFREE(&nd, NDF_ONLY_PNBUF);
3241 	if (vp->v_type != VCHR) {
3242 		error = EINVAL;
3243 		goto out;
3244 	}
3245 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3246 		goto out;
3247 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3248 	    (error = suser_xxx(0, p, PRISON_ROOT)))
3249 		goto out;
3250 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3251 		goto out;
3252 	if (vcount(vp) > 1)
3253 		VOP_REVOKE(vp, REVOKEALL);
3254 	vn_finished_write(mp);
3255 out:
3256 	vrele(vp);
3257 	return (error);
3258 }
3259 
3260 /*
3261  * Convert a user file descriptor to a kernel file entry.
3262  */
3263 int
3264 getvnode(fdp, fd, fpp)
3265 	struct filedesc *fdp;
3266 	int fd;
3267 	struct file **fpp;
3268 {
3269 	struct file *fp;
3270 
3271 	if ((u_int)fd >= fdp->fd_nfiles ||
3272 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3273 		return (EBADF);
3274 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3275 		return (EINVAL);
3276 	*fpp = fp;
3277 	return (0);
3278 }
3279 /*
3280  * Get (NFS) file handle
3281  */
3282 #ifndef _SYS_SYSPROTO_H_
3283 struct getfh_args {
3284 	char	*fname;
3285 	fhandle_t *fhp;
3286 };
3287 #endif
3288 int
3289 getfh(p, uap)
3290 	struct proc *p;
3291 	register struct getfh_args *uap;
3292 {
3293 	struct nameidata nd;
3294 	fhandle_t fh;
3295 	register struct vnode *vp;
3296 	int error;
3297 
3298 	/*
3299 	 * Must be super user
3300 	 */
3301 	error = suser(p);
3302 	if (error)
3303 		return (error);
3304 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3305 	error = namei(&nd);
3306 	if (error)
3307 		return (error);
3308 	NDFREE(&nd, NDF_ONLY_PNBUF);
3309 	vp = nd.ni_vp;
3310 	bzero(&fh, sizeof(fh));
3311 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3312 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3313 	vput(vp);
3314 	if (error)
3315 		return (error);
3316 	error = copyout(&fh, uap->fhp, sizeof (fh));
3317 	return (error);
3318 }
3319 
3320 /*
3321  * syscall for the rpc.lockd to use to translate a NFS file handle into
3322  * an open descriptor.
3323  *
3324  * warning: do not remove the suser() call or this becomes one giant
3325  * security hole.
3326  */
3327 #ifndef _SYS_SYSPROTO_H_
3328 struct fhopen_args {
3329 	const struct fhandle *u_fhp;
3330 	int flags;
3331 };
3332 #endif
3333 int
3334 fhopen(p, uap)
3335 	struct proc *p;
3336 	struct fhopen_args /* {
3337 		syscallarg(const struct fhandle *) u_fhp;
3338 		syscallarg(int) flags;
3339 	} */ *uap;
3340 {
3341 	struct mount *mp;
3342 	struct vnode *vp;
3343 	struct fhandle fhp;
3344 	struct vattr vat;
3345 	struct vattr *vap = &vat;
3346 	struct flock lf;
3347 	struct file *fp;
3348 	register struct filedesc *fdp = p->p_fd;
3349 	int fmode, mode, error, type;
3350 	struct file *nfp;
3351 	int indx;
3352 
3353 	/*
3354 	 * Must be super user
3355 	 */
3356 	error = suser(p);
3357 	if (error)
3358 		return (error);
3359 
3360 	fmode = FFLAGS(SCARG(uap, flags));
3361 	/* why not allow a non-read/write open for our lockd? */
3362 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3363 		return (EINVAL);
3364 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3365 	if (error)
3366 		return(error);
3367 	/* find the mount point */
3368 	mp = vfs_getvfs(&fhp.fh_fsid);
3369 	if (mp == NULL)
3370 		return (ESTALE);
3371 	/* now give me my vnode, it gets returned to me locked */
3372 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3373 	if (error)
3374 		return (error);
3375  	/*
3376 	 * from now on we have to make sure not
3377 	 * to forget about the vnode
3378 	 * any error that causes an abort must vput(vp)
3379 	 * just set error = err and 'goto bad;'.
3380 	 */
3381 
3382 	/*
3383 	 * from vn_open
3384 	 */
3385 	if (vp->v_type == VLNK) {
3386 		error = EMLINK;
3387 		goto bad;
3388 	}
3389 	if (vp->v_type == VSOCK) {
3390 		error = EOPNOTSUPP;
3391 		goto bad;
3392 	}
3393 	mode = 0;
3394 	if (fmode & (FWRITE | O_TRUNC)) {
3395 		if (vp->v_type == VDIR) {
3396 			error = EISDIR;
3397 			goto bad;
3398 		}
3399 		error = vn_writechk(vp);
3400 		if (error)
3401 			goto bad;
3402 		mode |= VWRITE;
3403 	}
3404 	if (fmode & FREAD)
3405 		mode |= VREAD;
3406 	if (mode) {
3407 		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3408 		if (error)
3409 			goto bad;
3410 	}
3411 	if (fmode & O_TRUNC) {
3412 		VOP_UNLOCK(vp, 0, p);				/* XXX */
3413 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3414 			vrele(vp);
3415 			return (error);
3416 		}
3417 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3418 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3419 		VATTR_NULL(vap);
3420 		vap->va_size = 0;
3421 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3422 		vn_finished_write(mp);
3423 		if (error)
3424 			goto bad;
3425 	}
3426 	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3427 	if (error)
3428 		goto bad;
3429 	/*
3430 	 * Make sure that a VM object is created for VMIO support.
3431 	 */
3432 	if (vn_canvmio(vp) == TRUE) {
3433 		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3434 			goto bad;
3435 	}
3436 	if (fmode & FWRITE)
3437 		vp->v_writecount++;
3438 
3439 	/*
3440 	 * end of vn_open code
3441 	 */
3442 
3443 	if ((error = falloc(p, &nfp, &indx)) != 0)
3444 		goto bad;
3445 	fp = nfp;
3446 
3447 	/*
3448 	 * Hold an extra reference to avoid having fp ripped out
3449 	 * from under us while we block in the lock op
3450 	 */
3451 	fhold(fp);
3452 	nfp->f_data = (caddr_t)vp;
3453 	nfp->f_flag = fmode & FMASK;
3454 	nfp->f_ops = &vnops;
3455 	nfp->f_type = DTYPE_VNODE;
3456 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3457 		lf.l_whence = SEEK_SET;
3458 		lf.l_start = 0;
3459 		lf.l_len = 0;
3460 		if (fmode & O_EXLOCK)
3461 			lf.l_type = F_WRLCK;
3462 		else
3463 			lf.l_type = F_RDLCK;
3464 		type = F_FLOCK;
3465 		if ((fmode & FNONBLOCK) == 0)
3466 			type |= F_WAIT;
3467 		VOP_UNLOCK(vp, 0, p);
3468 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3469 			/*
3470 			 * The lock request failed.  Normally close the
3471 			 * descriptor but handle the case where someone might
3472 			 * have dup()d or close()d it when we weren't looking.
3473 			 */
3474 			if (fdp->fd_ofiles[indx] == fp) {
3475 				fdp->fd_ofiles[indx] = NULL;
3476 				fdrop(fp, p);
3477 			}
3478 			/*
3479 			 * release our private reference
3480 			 */
3481 			fdrop(fp, p);
3482 			return(error);
3483 		}
3484 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3485 		fp->f_flag |= FHASLOCK;
3486 	}
3487 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3488 		vfs_object_create(vp, p, p->p_ucred);
3489 
3490 	VOP_UNLOCK(vp, 0, p);
3491 	fdrop(fp, p);
3492 	p->p_retval[0] = indx;
3493 	return (0);
3494 
3495 bad:
3496 	vput(vp);
3497 	return (error);
3498 }
3499 
3500 /*
3501  * Stat an (NFS) file handle.
3502  */
3503 #ifndef _SYS_SYSPROTO_H_
3504 struct fhstat_args {
3505 	struct fhandle *u_fhp;
3506 	struct stat *sb;
3507 };
3508 #endif
3509 int
3510 fhstat(p, uap)
3511 	struct proc *p;
3512 	register struct fhstat_args /* {
3513 		syscallarg(struct fhandle *) u_fhp;
3514 		syscallarg(struct stat *) sb;
3515 	} */ *uap;
3516 {
3517 	struct stat sb;
3518 	fhandle_t fh;
3519 	struct mount *mp;
3520 	struct vnode *vp;
3521 	int error;
3522 
3523 	/*
3524 	 * Must be super user
3525 	 */
3526 	error = suser(p);
3527 	if (error)
3528 		return (error);
3529 
3530 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3531 	if (error)
3532 		return (error);
3533 
3534 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3535 		return (ESTALE);
3536 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3537 		return (error);
3538 	error = vn_stat(vp, &sb, p);
3539 	vput(vp);
3540 	if (error)
3541 		return (error);
3542 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3543 	return (error);
3544 }
3545 
3546 /*
3547  * Implement fstatfs() for (NFS) file handles.
3548  */
3549 #ifndef _SYS_SYSPROTO_H_
3550 struct fhstatfs_args {
3551 	struct fhandle *u_fhp;
3552 	struct statfs *buf;
3553 };
3554 #endif
3555 int
3556 fhstatfs(p, uap)
3557 	struct proc *p;
3558 	struct fhstatfs_args /* {
3559 		syscallarg(struct fhandle) *u_fhp;
3560 		syscallarg(struct statfs) *buf;
3561 	} */ *uap;
3562 {
3563 	struct statfs *sp;
3564 	struct mount *mp;
3565 	struct vnode *vp;
3566 	struct statfs sb;
3567 	fhandle_t fh;
3568 	int error;
3569 
3570 	/*
3571 	 * Must be super user
3572 	 */
3573 	if ((error = suser(p)))
3574 		return (error);
3575 
3576 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3577 		return (error);
3578 
3579 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3580 		return (ESTALE);
3581 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3582 		return (error);
3583 	mp = vp->v_mount;
3584 	sp = &mp->mnt_stat;
3585 	vput(vp);
3586 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3587 		return (error);
3588 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3589 	if (suser_xxx(p->p_ucred, 0, 0)) {
3590 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3591 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3592 		sp = &sb;
3593 	}
3594 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3595 }
3596 
3597 /*
3598  * Syscall to push extended attribute configuration information into the
3599  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3600  * a command (int cmd), and attribute name and misc data.  For now, the
3601  * attribute name is left in userspace for consumption by the VFS_op.
3602  * It will probably be changed to be copied into sysspace by the
3603  * syscall in the future, once issues with various consumers of the
3604  * attribute code have raised their hands.
3605  *
3606  * Currently this is used only by UFS Extended Attributes.
3607  */
3608 int
3609 extattrctl(p, uap)
3610 	struct proc *p;
3611 	struct extattrctl_args *uap;
3612 {
3613 	struct nameidata nd;
3614 	struct mount *mp;
3615 	int error;
3616 
3617 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3618 	if ((error = namei(&nd)) != 0)
3619 		return (error);
3620 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3621 	NDFREE(&nd, 0);
3622 	if (error)
3623 		return (error);
3624 	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
3625 	    SCARG(uap, arg), p);
3626 	vn_finished_write(mp);
3627 	return (error);
3628 }
3629 
3630 /*
3631  * Syscall to set a named extended attribute on a file or directory.
3632  * Accepts attribute name, and a uio structure pointing to the data to set.
3633  * The uio is consumed in the style of writev().  The real work happens
3634  * in VOP_SETEXTATTR().
3635  */
3636 int
3637 extattr_set_file(p, uap)
3638 	struct proc *p;
3639 	struct extattr_set_file_args *uap;
3640 {
3641 	struct nameidata nd;
3642 	struct mount *mp;
3643 	struct uio auio;
3644 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3645 	char attrname[EXTATTR_MAXNAMELEN];
3646 	u_int iovlen, cnt;
3647 	int error, i;
3648 
3649 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3650 	if (error)
3651 		return (error);
3652 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3653 	    SCARG(uap, path), p);
3654 	if ((error = namei(&nd)) != 0)
3655 		return(error);
3656 	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
3657 		NDFREE(&nd, 0);
3658 		return (error);
3659 	}
3660 	iovlen = uap->iovcnt * sizeof(struct iovec);
3661 	if (uap->iovcnt > UIO_SMALLIOV) {
3662 		if (uap->iovcnt > UIO_MAXIOV) {
3663 			error = EINVAL;
3664 			goto done;
3665 		}
3666 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3667 		needfree = iov;
3668 	} else
3669 		iov = aiov;
3670 	auio.uio_iov = iov;
3671 	auio.uio_iovcnt = uap->iovcnt;
3672 	auio.uio_rw = UIO_WRITE;
3673 	auio.uio_segflg = UIO_USERSPACE;
3674 	auio.uio_procp = p;
3675 	auio.uio_offset = 0;
3676 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3677 		goto done;
3678 	auio.uio_resid = 0;
3679 	for (i = 0; i < uap->iovcnt; i++) {
3680 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3681 			error = EINVAL;
3682 			goto done;
3683 		}
3684 		auio.uio_resid += iov->iov_len;
3685 		iov++;
3686 	}
3687 	cnt = auio.uio_resid;
3688 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3689 	    p);
3690 	cnt -= auio.uio_resid;
3691 	p->p_retval[0] = cnt;
3692 done:
3693 	if (needfree)
3694 		FREE(needfree, M_IOV);
3695 	NDFREE(&nd, 0);
3696 	vn_finished_write(mp);
3697 	return (error);
3698 }
3699 
3700 /*
3701  * Syscall to get a named extended attribute on a file or directory.
3702  * Accepts attribute name, and a uio structure pointing to a buffer for the
3703  * data.  The uio is consumed in the style of readv().  The real work
3704  * happens in VOP_GETEXTATTR();
3705  */
3706 int
3707 extattr_get_file(p, uap)
3708 	struct proc *p;
3709 	struct extattr_get_file_args *uap;
3710 {
3711 	struct nameidata nd;
3712 	struct uio auio;
3713 	struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
3714 	char attrname[EXTATTR_MAXNAMELEN];
3715 	u_int iovlen, cnt;
3716 	int error, i;
3717 
3718 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3719 	if (error)
3720 		return (error);
3721 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3722 	    SCARG(uap, path), p);
3723 	if ((error = namei(&nd)) != 0)
3724 		return (error);
3725 	iovlen = uap->iovcnt * sizeof (struct iovec);
3726 	if (uap->iovcnt > UIO_SMALLIOV) {
3727 		if (uap->iovcnt > UIO_MAXIOV) {
3728 			NDFREE(&nd, 0);
3729 			return (EINVAL);
3730 		}
3731 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3732 		needfree = iov;
3733 	} else {
3734 		iov = aiov;
3735 		needfree = NULL;
3736 	}
3737 	auio.uio_iov = iov;
3738 	auio.uio_iovcnt = uap->iovcnt;
3739 	auio.uio_rw = UIO_READ;
3740 	auio.uio_segflg = UIO_USERSPACE;
3741 	auio.uio_procp = p;
3742 	auio.uio_offset = 0;
3743 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3744 		goto done;
3745 	auio.uio_resid = 0;
3746 	for (i = 0; i < uap->iovcnt; i++) {
3747 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3748 			error = EINVAL;
3749 			goto done;
3750 		}
3751 		auio.uio_resid += iov->iov_len;
3752 		iov++;
3753 	}
3754 	cnt = auio.uio_resid;
3755 	error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3756 	    p);
3757 	cnt -= auio.uio_resid;
3758 	p->p_retval[0] = cnt;
3759 done:
3760 	if (needfree)
3761 		FREE(needfree, M_IOV);
3762 	NDFREE(&nd, 0);
3763 	return(error);
3764 }
3765 
3766 /*
3767  * Syscall to delete a named extended attribute from a file or directory.
3768  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
3769  */
3770 int
3771 extattr_delete_file(p, uap)
3772 	struct proc *p;
3773 	struct extattr_delete_file_args *uap;
3774 {
3775 	struct mount *mp;
3776 	struct nameidata nd;
3777 	char attrname[EXTATTR_MAXNAMELEN];
3778 	int	error;
3779 
3780 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3781 	if (error)
3782 		return(error);
3783 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3784 	    SCARG(uap, path), p);
3785 	if ((error = namei(&nd)) != 0)
3786 		return(error);
3787 	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
3788 		NDFREE(&nd, 0);
3789 		return (error);
3790 	}
3791 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
3792 	    p);
3793 	NDFREE(&nd, 0);
3794 	vn_finished_write(mp);
3795 	return(error);
3796 }
3797