xref: /freebsd/sys/kern/vfs_extattr.c (revision b601c69bdbe8755d26570261d7fd4c02ee4eff74)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/sysproto.h>
54 #include <sys/namei.h>
55 #include <sys/filedesc.h>
56 #include <sys/kernel.h>
57 #include <sys/fcntl.h>
58 #include <sys/file.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/proc.h>
64 #include <sys/dirent.h>
65 #include <sys/extattr.h>
66 
67 #include <machine/limits.h>
68 #include <miscfs/union/union.h>
69 #include <sys/sysctl.h>
70 #include <vm/vm.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_zone.h>
73 
74 static int change_dir __P((struct nameidata *ndp, struct proc *p));
75 static void checkdirs __P((struct vnode *olddp));
76 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
77 static int getutimes __P((const struct timeval *, struct timespec *));
78 static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
79 static int setfmode __P((struct proc *, struct vnode *, int));
80 static int setfflags __P((struct proc *, struct vnode *, int));
81 static int setutimes __P((struct proc *, struct vnode *,
82     const struct timespec *, int));
83 static int	usermount = 0;	/* if 1, non-root can mount fs. */
84 
85 int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
86 
87 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
88 
89 /*
90  * Virtual File System System Calls
91  */
92 
93 /*
94  * Mount a file system.
95  */
96 #ifndef _SYS_SYSPROTO_H_
97 struct mount_args {
98 	char	*type;
99 	char	*path;
100 	int	flags;
101 	caddr_t	data;
102 };
103 #endif
104 /* ARGSUSED */
105 int
106 mount(p, uap)
107 	struct proc *p;
108 	register struct mount_args /* {
109 		syscallarg(char *) type;
110 		syscallarg(char *) path;
111 		syscallarg(int) flags;
112 		syscallarg(caddr_t) data;
113 	} */ *uap;
114 {
115 	struct vnode *vp;
116 	struct mount *mp;
117 	struct vfsconf *vfsp;
118 	int error, flag = 0, flag2 = 0;
119 	struct vattr va;
120 #ifdef COMPAT_43
121 	u_long fstypenum;
122 #endif
123 	struct nameidata nd;
124 	char fstypename[MFSNAMELEN];
125 
126 	if (usermount == 0 && (error = suser(p)))
127 		return (error);
128 	/*
129 	 * Do not allow NFS export by non-root users.
130 	 */
131 	if (SCARG(uap, flags) & MNT_EXPORTED) {
132 		error = suser(p);
133 		if (error)
134 			return (error);
135 	}
136 	/*
137 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
138 	 */
139 	if (suser_xxx(p->p_ucred, 0, 0))
140 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
141 	/*
142 	 * Get vnode to be covered
143 	 */
144 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
145 	    SCARG(uap, path), p);
146 	if ((error = namei(&nd)) != 0)
147 		return (error);
148 	NDFREE(&nd, NDF_ONLY_PNBUF);
149 	vp = nd.ni_vp;
150 	if (SCARG(uap, flags) & MNT_UPDATE) {
151 		if ((vp->v_flag & VROOT) == 0) {
152 			vput(vp);
153 			return (EINVAL);
154 		}
155 		mp = vp->v_mount;
156 		flag = mp->mnt_flag;
157 		flag2 = mp->mnt_kern_flag;
158 		/*
159 		 * We only allow the filesystem to be reloaded if it
160 		 * is currently mounted read-only.
161 		 */
162 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
163 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
164 			vput(vp);
165 			return (EOPNOTSUPP);	/* Needs translation */
166 		}
167 		mp->mnt_flag |= SCARG(uap, flags) &
168 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
169 		/*
170 		 * Only root, or the user that did the original mount is
171 		 * permitted to update it.
172 		 */
173 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
174 		    (error = suser(p))) {
175 			vput(vp);
176 			return (error);
177 		}
178 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
179 			vput(vp);
180 			return (EBUSY);
181 		}
182 		VOP_UNLOCK(vp, 0, p);
183 		goto update;
184 	}
185 	/*
186 	 * If the user is not root, ensure that they own the directory
187 	 * onto which we are attempting to mount.
188 	 */
189 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
190 	    (va.va_uid != p->p_ucred->cr_uid &&
191 	     (error = suser(p)))) {
192 		vput(vp);
193 		return (error);
194 	}
195 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
196 		return (error);
197 	if (vp->v_type != VDIR) {
198 		vput(vp);
199 		return (ENOTDIR);
200 	}
201 #ifdef COMPAT_43
202 	/*
203 	 * Historically filesystem types were identified by number. If we
204 	 * get an integer for the filesystem type instead of a string, we
205 	 * check to see if it matches one of the historic filesystem types.
206 	 */
207 	fstypenum = (uintptr_t)SCARG(uap, type);
208 	if (fstypenum < maxvfsconf) {
209 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
210 			if (vfsp->vfc_typenum == fstypenum)
211 				break;
212 		if (vfsp == NULL) {
213 			vput(vp);
214 			return (ENODEV);
215 		}
216 		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
217 	} else
218 #endif /* COMPAT_43 */
219 	if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) {
220 		vput(vp);
221 		return (error);
222 	}
223 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
224 		if (!strcmp(vfsp->vfc_name, fstypename))
225 			break;
226 	if (vfsp == NULL) {
227 		linker_file_t lf;
228 
229 		/* Only load modules for root (very important!) */
230 		if ((error = suser(p)) != 0) {
231 			vput(vp);
232 			return error;
233 		}
234 		error = linker_load_file(fstypename, &lf);
235 		if (error || lf == NULL) {
236 			vput(vp);
237 			if (lf == NULL)
238 				error = ENODEV;
239 			return error;
240 		}
241 		lf->userrefs++;
242 		/* lookup again, see if the VFS was loaded */
243 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
244 			if (!strcmp(vfsp->vfc_name, fstypename))
245 				break;
246 		if (vfsp == NULL) {
247 			lf->userrefs--;
248 			linker_file_unload(lf);
249 			vput(vp);
250 			return (ENODEV);
251 		}
252 	}
253 	simple_lock(&vp->v_interlock);
254 	if ((vp->v_flag & VMOUNT) != 0 ||
255 	    vp->v_mountedhere != NULL) {
256 		simple_unlock(&vp->v_interlock);
257 		vput(vp);
258 		return (EBUSY);
259 	}
260 	vp->v_flag |= VMOUNT;
261 	simple_unlock(&vp->v_interlock);
262 
263 	/*
264 	 * Allocate and initialize the filesystem.
265 	 */
266 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK);
267 	bzero((char *)mp, (u_long)sizeof(struct mount));
268 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
269 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
270 	mp->mnt_op = vfsp->vfc_vfsops;
271 	mp->mnt_vfc = vfsp;
272 	vfsp->vfc_refcount++;
273 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
274 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
275 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
276 	mp->mnt_vnodecovered = vp;
277 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
278 	mp->mnt_iosize_max = DFLTPHYS;
279 	VOP_UNLOCK(vp, 0, p);
280 update:
281 	/*
282 	 * Set the mount level flags.
283 	 */
284 	if (SCARG(uap, flags) & MNT_RDONLY)
285 		mp->mnt_flag |= MNT_RDONLY;
286 	else if (mp->mnt_flag & MNT_RDONLY)
287 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
288 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
289 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
290 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
291 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
292 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
293 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
294 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
295 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
296 	/*
297 	 * Mount the filesystem.
298 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
299 	 * get.  No freeing of cn_pnbuf.
300 	 */
301 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
302 	if (mp->mnt_flag & MNT_UPDATE) {
303 		vrele(vp);
304 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
305 			mp->mnt_flag &= ~MNT_RDONLY;
306 		mp->mnt_flag &=~
307 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
308 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
309 		if (error) {
310 			mp->mnt_flag = flag;
311 			mp->mnt_kern_flag = flag2;
312 		}
313 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
314 			if (mp->mnt_syncer == NULL)
315 				error = vfs_allocate_syncvnode(mp);
316 		} else {
317 			if (mp->mnt_syncer != NULL)
318 				vrele(mp->mnt_syncer);
319 			mp->mnt_syncer = NULL;
320 		}
321 		vfs_unbusy(mp, p);
322 		return (error);
323 	}
324 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
325 	/*
326 	 * Put the new filesystem on the mount list after root.
327 	 */
328 	cache_purge(vp);
329 	if (!error) {
330 		simple_lock(&vp->v_interlock);
331 		vp->v_flag &= ~VMOUNT;
332 		vp->v_mountedhere = mp;
333 		simple_unlock(&vp->v_interlock);
334 		simple_lock(&mountlist_slock);
335 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
336 		simple_unlock(&mountlist_slock);
337 		checkdirs(vp);
338 		VOP_UNLOCK(vp, 0, p);
339 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
340 			error = vfs_allocate_syncvnode(mp);
341 		vfs_unbusy(mp, p);
342 		if ((error = VFS_START(mp, 0, p)) != 0)
343 			vrele(vp);
344 	} else {
345 		simple_lock(&vp->v_interlock);
346 		vp->v_flag &= ~VMOUNT;
347 		simple_unlock(&vp->v_interlock);
348 		mp->mnt_vfc->vfc_refcount--;
349 		vfs_unbusy(mp, p);
350 		free((caddr_t)mp, M_MOUNT);
351 		vput(vp);
352 	}
353 	return (error);
354 }
355 
356 /*
357  * Scan all active processes to see if any of them have a current
358  * or root directory onto which the new filesystem has just been
359  * mounted. If so, replace them with the new mount point.
360  */
361 static void
362 checkdirs(olddp)
363 	struct vnode *olddp;
364 {
365 	struct filedesc *fdp;
366 	struct vnode *newdp;
367 	struct proc *p;
368 
369 	if (olddp->v_usecount == 1)
370 		return;
371 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
372 		panic("mount: lost mount");
373 	LIST_FOREACH(p, &allproc, p_list) {
374 		fdp = p->p_fd;
375 		if (fdp->fd_cdir == olddp) {
376 			vrele(fdp->fd_cdir);
377 			VREF(newdp);
378 			fdp->fd_cdir = newdp;
379 		}
380 		if (fdp->fd_rdir == olddp) {
381 			vrele(fdp->fd_rdir);
382 			VREF(newdp);
383 			fdp->fd_rdir = newdp;
384 		}
385 	}
386 	if (rootvnode == olddp) {
387 		vrele(rootvnode);
388 		VREF(newdp);
389 		rootvnode = newdp;
390 	}
391 	vput(newdp);
392 }
393 
394 /*
395  * Unmount a file system.
396  *
397  * Note: unmount takes a path to the vnode mounted on as argument,
398  * not special file (as before).
399  */
400 #ifndef _SYS_SYSPROTO_H_
401 struct unmount_args {
402 	char	*path;
403 	int	flags;
404 };
405 #endif
406 /* ARGSUSED */
407 int
408 unmount(p, uap)
409 	struct proc *p;
410 	register struct unmount_args /* {
411 		syscallarg(char *) path;
412 		syscallarg(int) flags;
413 	} */ *uap;
414 {
415 	register struct vnode *vp;
416 	struct mount *mp;
417 	int error;
418 	struct nameidata nd;
419 
420 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
421 	    SCARG(uap, path), p);
422 	if ((error = namei(&nd)) != 0)
423 		return (error);
424 	vp = nd.ni_vp;
425 	NDFREE(&nd, NDF_ONLY_PNBUF);
426 	mp = vp->v_mount;
427 
428 	/*
429 	 * Only root, or the user that did the original mount is
430 	 * permitted to unmount this filesystem.
431 	 */
432 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
433 	    (error = suser(p))) {
434 		vput(vp);
435 		return (error);
436 	}
437 
438 	/*
439 	 * Don't allow unmounting the root file system.
440 	 */
441 	if (mp->mnt_flag & MNT_ROOTFS) {
442 		vput(vp);
443 		return (EINVAL);
444 	}
445 
446 	/*
447 	 * Must be the root of the filesystem
448 	 */
449 	if ((vp->v_flag & VROOT) == 0) {
450 		vput(vp);
451 		return (EINVAL);
452 	}
453 	vput(vp);
454 	return (dounmount(mp, SCARG(uap, flags), p));
455 }
456 
457 /*
458  * Do the actual file system unmount.
459  */
460 int
461 dounmount(mp, flags, p)
462 	struct mount *mp;
463 	int flags;
464 	struct proc *p;
465 {
466 	struct vnode *coveredvp;
467 	int error;
468 	int async_flag;
469 
470 	simple_lock(&mountlist_slock);
471 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
472 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
473 	vn_start_write(NULL, &mp, V_WAIT);
474 
475 	if (mp->mnt_flag & MNT_EXPUBLIC)
476 		vfs_setpublicfs(NULL, NULL, NULL);
477 
478 	vfs_msync(mp, MNT_WAIT);
479 	async_flag = mp->mnt_flag & MNT_ASYNC;
480 	mp->mnt_flag &=~ MNT_ASYNC;
481 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
482 	if (mp->mnt_syncer != NULL)
483 		vrele(mp->mnt_syncer);
484 	if (((mp->mnt_flag & MNT_RDONLY) ||
485 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
486 	    (flags & MNT_FORCE)) {
487 		error = VFS_UNMOUNT(mp, flags, p);
488 	}
489 	vn_finished_write(mp);
490 	simple_lock(&mountlist_slock);
491 	if (error) {
492 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
493 			(void) vfs_allocate_syncvnode(mp);
494 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
495 		mp->mnt_flag |= async_flag;
496 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
497 		    &mountlist_slock, p);
498 		if (mp->mnt_kern_flag & MNTK_MWAIT)
499 			wakeup((caddr_t)mp);
500 		return (error);
501 	}
502 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
503 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
504 		coveredvp->v_mountedhere = (struct mount *)0;
505 		vrele(coveredvp);
506 	}
507 	mp->mnt_vfc->vfc_refcount--;
508 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
509 		panic("unmount: dangling vnode");
510 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
511 	if (mp->mnt_kern_flag & MNTK_MWAIT)
512 		wakeup((caddr_t)mp);
513 	free((caddr_t)mp, M_MOUNT);
514 	return (0);
515 }
516 
517 /*
518  * Sync each mounted filesystem.
519  */
520 #ifndef _SYS_SYSPROTO_H_
521 struct sync_args {
522         int     dummy;
523 };
524 #endif
525 
526 #ifdef DEBUG
527 static int syncprt = 0;
528 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
529 #endif
530 
531 /* ARGSUSED */
532 int
533 sync(p, uap)
534 	struct proc *p;
535 	struct sync_args *uap;
536 {
537 	struct mount *mp, *nmp;
538 	int asyncflag;
539 
540 	simple_lock(&mountlist_slock);
541 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
542 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
543 			nmp = TAILQ_NEXT(mp, mnt_list);
544 			continue;
545 		}
546 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
547 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
548 			asyncflag = mp->mnt_flag & MNT_ASYNC;
549 			mp->mnt_flag &= ~MNT_ASYNC;
550 			vfs_msync(mp, MNT_NOWAIT);
551 			VFS_SYNC(mp, MNT_NOWAIT,
552 			    ((p != NULL) ? p->p_ucred : NOCRED), p);
553 			mp->mnt_flag |= asyncflag;
554 			vn_finished_write(mp);
555 		}
556 		simple_lock(&mountlist_slock);
557 		nmp = TAILQ_NEXT(mp, mnt_list);
558 		vfs_unbusy(mp, p);
559 	}
560 	simple_unlock(&mountlist_slock);
561 #if 0
562 /*
563  * XXX don't call vfs_bufstats() yet because that routine
564  * was not imported in the Lite2 merge.
565  */
566 #ifdef DIAGNOSTIC
567 	if (syncprt)
568 		vfs_bufstats();
569 #endif /* DIAGNOSTIC */
570 #endif
571 	return (0);
572 }
573 
574 /* XXX PRISON: could be per prison flag */
575 static int prison_quotas;
576 #if 0
577 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
578 #endif
579 
580 /*
581  * Change filesystem quotas.
582  */
583 #ifndef _SYS_SYSPROTO_H_
584 struct quotactl_args {
585 	char *path;
586 	int cmd;
587 	int uid;
588 	caddr_t arg;
589 };
590 #endif
591 /* ARGSUSED */
592 int
593 quotactl(p, uap)
594 	struct proc *p;
595 	register struct quotactl_args /* {
596 		syscallarg(char *) path;
597 		syscallarg(int) cmd;
598 		syscallarg(int) uid;
599 		syscallarg(caddr_t) arg;
600 	} */ *uap;
601 {
602 	struct mount *mp;
603 	int error;
604 	struct nameidata nd;
605 
606 	if (p->p_prison && !prison_quotas)
607 		return (EPERM);
608 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
609 	if ((error = namei(&nd)) != 0)
610 		return (error);
611 	NDFREE(&nd, NDF_ONLY_PNBUF);
612 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
613 	vrele(nd.ni_vp);
614 	if (error)
615 		return (error);
616 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
617 	    SCARG(uap, arg), p);
618 	vn_finished_write(mp);
619 	return (error);
620 }
621 
622 /*
623  * Get filesystem statistics.
624  */
625 #ifndef _SYS_SYSPROTO_H_
626 struct statfs_args {
627 	char *path;
628 	struct statfs *buf;
629 };
630 #endif
631 /* ARGSUSED */
632 int
633 statfs(p, uap)
634 	struct proc *p;
635 	register struct statfs_args /* {
636 		syscallarg(char *) path;
637 		syscallarg(struct statfs *) buf;
638 	} */ *uap;
639 {
640 	register struct mount *mp;
641 	register struct statfs *sp;
642 	int error;
643 	struct nameidata nd;
644 	struct statfs sb;
645 
646 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
647 	if ((error = namei(&nd)) != 0)
648 		return (error);
649 	mp = nd.ni_vp->v_mount;
650 	sp = &mp->mnt_stat;
651 	NDFREE(&nd, NDF_ONLY_PNBUF);
652 	vrele(nd.ni_vp);
653 	error = VFS_STATFS(mp, sp, p);
654 	if (error)
655 		return (error);
656 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
657 	if (suser_xxx(p->p_ucred, 0, 0)) {
658 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
659 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
660 		sp = &sb;
661 	}
662 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
663 }
664 
665 /*
666  * Get filesystem statistics.
667  */
668 #ifndef _SYS_SYSPROTO_H_
669 struct fstatfs_args {
670 	int fd;
671 	struct statfs *buf;
672 };
673 #endif
674 /* ARGSUSED */
675 int
676 fstatfs(p, uap)
677 	struct proc *p;
678 	register struct fstatfs_args /* {
679 		syscallarg(int) fd;
680 		syscallarg(struct statfs *) buf;
681 	} */ *uap;
682 {
683 	struct file *fp;
684 	struct mount *mp;
685 	register struct statfs *sp;
686 	int error;
687 	struct statfs sb;
688 
689 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
690 		return (error);
691 	mp = ((struct vnode *)fp->f_data)->v_mount;
692 	sp = &mp->mnt_stat;
693 	error = VFS_STATFS(mp, sp, p);
694 	if (error)
695 		return (error);
696 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
697 	if (suser_xxx(p->p_ucred, 0, 0)) {
698 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
699 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
700 		sp = &sb;
701 	}
702 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
703 }
704 
705 /*
706  * Get statistics on all filesystems.
707  */
708 #ifndef _SYS_SYSPROTO_H_
709 struct getfsstat_args {
710 	struct statfs *buf;
711 	long bufsize;
712 	int flags;
713 };
714 #endif
715 int
716 getfsstat(p, uap)
717 	struct proc *p;
718 	register struct getfsstat_args /* {
719 		syscallarg(struct statfs *) buf;
720 		syscallarg(long) bufsize;
721 		syscallarg(int) flags;
722 	} */ *uap;
723 {
724 	register struct mount *mp, *nmp;
725 	register struct statfs *sp;
726 	caddr_t sfsp;
727 	long count, maxcount, error;
728 
729 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
730 	sfsp = (caddr_t)SCARG(uap, buf);
731 	count = 0;
732 	simple_lock(&mountlist_slock);
733 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
734 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
735 			nmp = TAILQ_NEXT(mp, mnt_list);
736 			continue;
737 		}
738 		if (sfsp && count < maxcount) {
739 			sp = &mp->mnt_stat;
740 			/*
741 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
742 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
743 			 * overrides MNT_WAIT.
744 			 */
745 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
746 			    (SCARG(uap, flags) & MNT_WAIT)) &&
747 			    (error = VFS_STATFS(mp, sp, p))) {
748 				simple_lock(&mountlist_slock);
749 				nmp = TAILQ_NEXT(mp, mnt_list);
750 				vfs_unbusy(mp, p);
751 				continue;
752 			}
753 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
754 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
755 			if (error) {
756 				vfs_unbusy(mp, p);
757 				return (error);
758 			}
759 			sfsp += sizeof(*sp);
760 		}
761 		count++;
762 		simple_lock(&mountlist_slock);
763 		nmp = TAILQ_NEXT(mp, mnt_list);
764 		vfs_unbusy(mp, p);
765 	}
766 	simple_unlock(&mountlist_slock);
767 	if (sfsp && count > maxcount)
768 		p->p_retval[0] = maxcount;
769 	else
770 		p->p_retval[0] = count;
771 	return (0);
772 }
773 
774 /*
775  * Change current working directory to a given file descriptor.
776  */
777 #ifndef _SYS_SYSPROTO_H_
778 struct fchdir_args {
779 	int	fd;
780 };
781 #endif
782 /* ARGSUSED */
783 int
784 fchdir(p, uap)
785 	struct proc *p;
786 	struct fchdir_args /* {
787 		syscallarg(int) fd;
788 	} */ *uap;
789 {
790 	register struct filedesc *fdp = p->p_fd;
791 	struct vnode *vp, *tdp;
792 	struct mount *mp;
793 	struct file *fp;
794 	int error;
795 
796 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
797 		return (error);
798 	vp = (struct vnode *)fp->f_data;
799 	VREF(vp);
800 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
801 	if (vp->v_type != VDIR)
802 		error = ENOTDIR;
803 	else
804 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
805 	while (!error && (mp = vp->v_mountedhere) != NULL) {
806 		if (vfs_busy(mp, 0, 0, p))
807 			continue;
808 		error = VFS_ROOT(mp, &tdp);
809 		vfs_unbusy(mp, p);
810 		if (error)
811 			break;
812 		vput(vp);
813 		vp = tdp;
814 	}
815 	if (error) {
816 		vput(vp);
817 		return (error);
818 	}
819 	VOP_UNLOCK(vp, 0, p);
820 	vrele(fdp->fd_cdir);
821 	fdp->fd_cdir = vp;
822 	return (0);
823 }
824 
825 /*
826  * Change current working directory (``.'').
827  */
828 #ifndef _SYS_SYSPROTO_H_
829 struct chdir_args {
830 	char	*path;
831 };
832 #endif
833 /* ARGSUSED */
834 int
835 chdir(p, uap)
836 	struct proc *p;
837 	struct chdir_args /* {
838 		syscallarg(char *) path;
839 	} */ *uap;
840 {
841 	register struct filedesc *fdp = p->p_fd;
842 	int error;
843 	struct nameidata nd;
844 
845 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
846 	    SCARG(uap, path), p);
847 	if ((error = change_dir(&nd, p)) != 0)
848 		return (error);
849 	NDFREE(&nd, NDF_ONLY_PNBUF);
850 	vrele(fdp->fd_cdir);
851 	fdp->fd_cdir = nd.ni_vp;
852 	return (0);
853 }
854 
855 /*
856  * Helper function for raised chroot(2) security function:  Refuse if
857  * any filedescriptors are open directories.
858  */
859 static int
860 chroot_refuse_vdir_fds(fdp)
861 	struct filedesc *fdp;
862 {
863 	struct vnode *vp;
864 	struct file *fp;
865 	int error;
866 	int fd;
867 
868 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
869 		error = getvnode(fdp, fd, &fp);
870 		if (error)
871 			continue;
872 		vp = (struct vnode *)fp->f_data;
873 		if (vp->v_type != VDIR)
874 			continue;
875 		return(EPERM);
876 	}
877 	return (0);
878 }
879 
880 /*
881  * This sysctl determines if we will allow a process to chroot(2) if it
882  * has a directory open:
883  *	0: disallowed for all processes.
884  *	1: allowed for processes that were not already chroot(2)'ed.
885  *	2: allowed for all processes.
886  */
887 
888 static int chroot_allow_open_directories = 1;
889 
890 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
891      &chroot_allow_open_directories, 0, "");
892 
893 /*
894  * Change notion of root (``/'') directory.
895  */
896 #ifndef _SYS_SYSPROTO_H_
897 struct chroot_args {
898 	char	*path;
899 };
900 #endif
901 /* ARGSUSED */
902 int
903 chroot(p, uap)
904 	struct proc *p;
905 	struct chroot_args /* {
906 		syscallarg(char *) path;
907 	} */ *uap;
908 {
909 	register struct filedesc *fdp = p->p_fd;
910 	int error;
911 	struct nameidata nd;
912 
913 	error = suser_xxx(0, p, PRISON_ROOT);
914 	if (error)
915 		return (error);
916 	if (chroot_allow_open_directories == 0 ||
917 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
918 		error = chroot_refuse_vdir_fds(fdp);
919 	if (error)
920 		return (error);
921 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
922 	    SCARG(uap, path), p);
923 	if ((error = change_dir(&nd, p)) != 0)
924 		return (error);
925 	NDFREE(&nd, NDF_ONLY_PNBUF);
926 	vrele(fdp->fd_rdir);
927 	fdp->fd_rdir = nd.ni_vp;
928 	if (!fdp->fd_jdir) {
929 		fdp->fd_jdir = nd.ni_vp;
930                 VREF(fdp->fd_jdir);
931 	}
932 	return (0);
933 }
934 
935 /*
936  * Common routine for chroot and chdir.
937  */
938 static int
939 change_dir(ndp, p)
940 	register struct nameidata *ndp;
941 	struct proc *p;
942 {
943 	struct vnode *vp;
944 	int error;
945 
946 	error = namei(ndp);
947 	if (error)
948 		return (error);
949 	vp = ndp->ni_vp;
950 	if (vp->v_type != VDIR)
951 		error = ENOTDIR;
952 	else
953 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
954 	if (error)
955 		vput(vp);
956 	else
957 		VOP_UNLOCK(vp, 0, p);
958 	return (error);
959 }
960 
961 /*
962  * Check permissions, allocate an open file structure,
963  * and call the device open routine if any.
964  */
965 #ifndef _SYS_SYSPROTO_H_
966 struct open_args {
967 	char	*path;
968 	int	flags;
969 	int	mode;
970 };
971 #endif
972 int
973 open(p, uap)
974 	struct proc *p;
975 	register struct open_args /* {
976 		syscallarg(char *) path;
977 		syscallarg(int) flags;
978 		syscallarg(int) mode;
979 	} */ *uap;
980 {
981 	struct filedesc *fdp = p->p_fd;
982 	struct file *fp;
983 	struct vnode *vp;
984 	struct vattr vat;
985 	struct mount *mp;
986 	int cmode, flags, oflags;
987 	struct file *nfp;
988 	int type, indx, error;
989 	struct flock lf;
990 	struct nameidata nd;
991 
992 	oflags = SCARG(uap, flags);
993 	if ((oflags & O_ACCMODE) == O_ACCMODE)
994 		return (EINVAL);
995 	flags = FFLAGS(oflags);
996 	error = falloc(p, &nfp, &indx);
997 	if (error)
998 		return (error);
999 	fp = nfp;
1000 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1001 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1002 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1003 	error = vn_open(&nd, &flags, cmode);
1004 	if (error) {
1005 		ffree(fp);
1006 		if ((error == ENODEV || error == ENXIO) &&
1007 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1008 		    (error =
1009 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
1010 			p->p_retval[0] = indx;
1011 			return (0);
1012 		}
1013 		if (error == ERESTART)
1014 			error = EINTR;
1015 		fdp->fd_ofiles[indx] = NULL;
1016 		return (error);
1017 	}
1018 	p->p_dupfd = 0;
1019 	NDFREE(&nd, NDF_ONLY_PNBUF);
1020 	vp = nd.ni_vp;
1021 
1022 	fp->f_data = (caddr_t)vp;
1023 	fp->f_flag = flags & FMASK;
1024 	fp->f_ops = &vnops;
1025 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1026 	VOP_UNLOCK(vp, 0, p);
1027 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1028 		lf.l_whence = SEEK_SET;
1029 		lf.l_start = 0;
1030 		lf.l_len = 0;
1031 		if (flags & O_EXLOCK)
1032 			lf.l_type = F_WRLCK;
1033 		else
1034 			lf.l_type = F_RDLCK;
1035 		type = F_FLOCK;
1036 		if ((flags & FNONBLOCK) == 0)
1037 			type |= F_WAIT;
1038 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1039 			goto bad;
1040 		fp->f_flag |= FHASLOCK;
1041 	}
1042 	if (flags & O_TRUNC) {
1043 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1044 			goto bad;
1045 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1046 		VATTR_NULL(&vat);
1047 		vat.va_size = 0;
1048 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1049 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
1050 		VOP_UNLOCK(vp, 0, p);
1051 		vn_finished_write(mp);
1052 		if (error)
1053 			goto bad;
1054 	}
1055 	/* assert that vn_open created a backing object if one is needed */
1056 	KASSERT(!vn_canvmio(vp) || vp->v_object != NULL,
1057 		("open: vmio vnode has no backing object after vn_open"));
1058 	p->p_retval[0] = indx;
1059 	return (0);
1060 bad:
1061 	(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
1062 	ffree(fp);
1063 	fdp->fd_ofiles[indx] = NULL;
1064 	return (error);
1065 }
1066 
1067 #ifdef COMPAT_43
1068 /*
1069  * Create a file.
1070  */
1071 #ifndef _SYS_SYSPROTO_H_
1072 struct ocreat_args {
1073 	char	*path;
1074 	int	mode;
1075 };
1076 #endif
1077 int
1078 ocreat(p, uap)
1079 	struct proc *p;
1080 	register struct ocreat_args /* {
1081 		syscallarg(char *) path;
1082 		syscallarg(int) mode;
1083 	} */ *uap;
1084 {
1085 	struct open_args /* {
1086 		syscallarg(char *) path;
1087 		syscallarg(int) flags;
1088 		syscallarg(int) mode;
1089 	} */ nuap;
1090 
1091 	SCARG(&nuap, path) = SCARG(uap, path);
1092 	SCARG(&nuap, mode) = SCARG(uap, mode);
1093 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1094 	return (open(p, &nuap));
1095 }
1096 #endif /* COMPAT_43 */
1097 
1098 /*
1099  * Create a special file.
1100  */
1101 #ifndef _SYS_SYSPROTO_H_
1102 struct mknod_args {
1103 	char	*path;
1104 	int	mode;
1105 	int	dev;
1106 };
1107 #endif
1108 /* ARGSUSED */
1109 int
1110 mknod(p, uap)
1111 	struct proc *p;
1112 	register struct mknod_args /* {
1113 		syscallarg(char *) path;
1114 		syscallarg(int) mode;
1115 		syscallarg(int) dev;
1116 	} */ *uap;
1117 {
1118 	struct vnode *vp;
1119 	struct mount *mp;
1120 	struct vattr vattr;
1121 	int error;
1122 	int whiteout = 0;
1123 	struct nameidata nd;
1124 
1125 	switch (SCARG(uap, mode) & S_IFMT) {
1126 	case S_IFCHR:
1127 	case S_IFBLK:
1128 		error = suser(p);
1129 		break;
1130 	default:
1131 		error = suser_xxx(0, p, PRISON_ROOT);
1132 		break;
1133 	}
1134 	if (error)
1135 		return (error);
1136 restart:
1137 	bwillwrite();
1138 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1139 	if ((error = namei(&nd)) != 0)
1140 		return (error);
1141 	vp = nd.ni_vp;
1142 	if (vp != NULL) {
1143 		vrele(vp);
1144 		error = EEXIST;
1145 	} else {
1146 		VATTR_NULL(&vattr);
1147 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1148 		vattr.va_rdev = SCARG(uap, dev);
1149 		whiteout = 0;
1150 
1151 		switch (SCARG(uap, mode) & S_IFMT) {
1152 		case S_IFMT:	/* used by badsect to flag bad sectors */
1153 			vattr.va_type = VBAD;
1154 			break;
1155 		case S_IFCHR:
1156 			vattr.va_type = VCHR;
1157 			break;
1158 		case S_IFBLK:
1159 			vattr.va_type = VBLK;
1160 			break;
1161 		case S_IFWHT:
1162 			whiteout = 1;
1163 			break;
1164 		default:
1165 			error = EINVAL;
1166 			break;
1167 		}
1168 	}
1169 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1170 		NDFREE(&nd, NDF_ONLY_PNBUF);
1171 		vput(nd.ni_dvp);
1172 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1173 			return (error);
1174 		goto restart;
1175 	}
1176 	if (!error) {
1177 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1178 		if (whiteout)
1179 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1180 		else {
1181 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1182 						&nd.ni_cnd, &vattr);
1183 			if (error == 0)
1184 				vput(nd.ni_vp);
1185 		}
1186 	}
1187 	NDFREE(&nd, NDF_ONLY_PNBUF);
1188 	vput(nd.ni_dvp);
1189 	vn_finished_write(mp);
1190 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1191 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1192 	return (error);
1193 }
1194 
1195 /*
1196  * Create a named pipe.
1197  */
1198 #ifndef _SYS_SYSPROTO_H_
1199 struct mkfifo_args {
1200 	char	*path;
1201 	int	mode;
1202 };
1203 #endif
1204 /* ARGSUSED */
1205 int
1206 mkfifo(p, uap)
1207 	struct proc *p;
1208 	register struct mkfifo_args /* {
1209 		syscallarg(char *) path;
1210 		syscallarg(int) mode;
1211 	} */ *uap;
1212 {
1213 	struct mount *mp;
1214 	struct vattr vattr;
1215 	int error;
1216 	struct nameidata nd;
1217 
1218 restart:
1219 	bwillwrite();
1220 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1221 	if ((error = namei(&nd)) != 0)
1222 		return (error);
1223 	if (nd.ni_vp != NULL) {
1224 		NDFREE(&nd, NDF_ONLY_PNBUF);
1225 		vrele(nd.ni_vp);
1226 		vput(nd.ni_dvp);
1227 		return (EEXIST);
1228 	}
1229 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1230 		NDFREE(&nd, NDF_ONLY_PNBUF);
1231 		vput(nd.ni_dvp);
1232 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1233 			return (error);
1234 		goto restart;
1235 	}
1236 	VATTR_NULL(&vattr);
1237 	vattr.va_type = VFIFO;
1238 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1239 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1240 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1241 	if (error == 0)
1242 		vput(nd.ni_vp);
1243 	NDFREE(&nd, NDF_ONLY_PNBUF);
1244 	vput(nd.ni_dvp);
1245 	vn_finished_write(mp);
1246 	return (error);
1247 }
1248 
1249 /*
1250  * Make a hard file link.
1251  */
1252 #ifndef _SYS_SYSPROTO_H_
1253 struct link_args {
1254 	char	*path;
1255 	char	*link;
1256 };
1257 #endif
1258 /* ARGSUSED */
1259 int
1260 link(p, uap)
1261 	struct proc *p;
1262 	register struct link_args /* {
1263 		syscallarg(char *) path;
1264 		syscallarg(char *) link;
1265 	} */ *uap;
1266 {
1267 	struct vnode *vp;
1268 	struct mount *mp;
1269 	struct nameidata nd;
1270 	int error;
1271 
1272 	bwillwrite();
1273 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1274 	if ((error = namei(&nd)) != 0)
1275 		return (error);
1276 	NDFREE(&nd, NDF_ONLY_PNBUF);
1277 	vp = nd.ni_vp;
1278 	if (vp->v_type == VDIR) {
1279 		vrele(vp);
1280 		return (EPERM);		/* POSIX */
1281 	}
1282 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1283 		vrele(vp);
1284 		return (error);
1285 	}
1286 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1287 	if ((error = namei(&nd)) == 0) {
1288 		if (nd.ni_vp != NULL) {
1289 			vrele(nd.ni_vp);
1290 			error = EEXIST;
1291 		} else {
1292 			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1293 			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1294 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1295 		}
1296 		NDFREE(&nd, NDF_ONLY_PNBUF);
1297 		vput(nd.ni_dvp);
1298 	}
1299 	vrele(vp);
1300 	vn_finished_write(mp);
1301 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1302 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1303 	return (error);
1304 }
1305 
1306 /*
1307  * Make a symbolic link.
1308  */
1309 #ifndef _SYS_SYSPROTO_H_
1310 struct symlink_args {
1311 	char	*path;
1312 	char	*link;
1313 };
1314 #endif
1315 /* ARGSUSED */
1316 int
1317 symlink(p, uap)
1318 	struct proc *p;
1319 	register struct symlink_args /* {
1320 		syscallarg(char *) path;
1321 		syscallarg(char *) link;
1322 	} */ *uap;
1323 {
1324 	struct mount *mp;
1325 	struct vattr vattr;
1326 	char *path;
1327 	int error;
1328 	struct nameidata nd;
1329 
1330 	path = zalloc(namei_zone);
1331 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1332 		goto out;
1333 restart:
1334 	bwillwrite();
1335 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1336 	if ((error = namei(&nd)) != 0)
1337 		goto out;
1338 	if (nd.ni_vp) {
1339 		NDFREE(&nd, NDF_ONLY_PNBUF);
1340 		vrele(nd.ni_vp);
1341 		vput(nd.ni_dvp);
1342 		error = EEXIST;
1343 		goto out;
1344 	}
1345 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1346 		NDFREE(&nd, NDF_ONLY_PNBUF);
1347 		vput(nd.ni_dvp);
1348 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1349 			return (error);
1350 		goto restart;
1351 	}
1352 	VATTR_NULL(&vattr);
1353 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1354 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1355 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1356 	NDFREE(&nd, NDF_ONLY_PNBUF);
1357 	if (error == 0)
1358 		vput(nd.ni_vp);
1359 	vput(nd.ni_dvp);
1360 	vn_finished_write(mp);
1361 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1362 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1363 out:
1364 	zfree(namei_zone, path);
1365 	return (error);
1366 }
1367 
1368 /*
1369  * Delete a whiteout from the filesystem.
1370  */
1371 /* ARGSUSED */
1372 int
1373 undelete(p, uap)
1374 	struct proc *p;
1375 	register struct undelete_args /* {
1376 		syscallarg(char *) path;
1377 	} */ *uap;
1378 {
1379 	int error;
1380 	struct mount *mp;
1381 	struct nameidata nd;
1382 
1383 restart:
1384 	bwillwrite();
1385 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1386 	    SCARG(uap, path), p);
1387 	error = namei(&nd);
1388 	if (error)
1389 		return (error);
1390 
1391 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1392 		NDFREE(&nd, NDF_ONLY_PNBUF);
1393 		if (nd.ni_vp)
1394 			vrele(nd.ni_vp);
1395 		vput(nd.ni_dvp);
1396 		return (EEXIST);
1397 	}
1398 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1399 		NDFREE(&nd, NDF_ONLY_PNBUF);
1400 		vput(nd.ni_dvp);
1401 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1402 			return (error);
1403 		goto restart;
1404 	}
1405 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1406 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1407 	NDFREE(&nd, NDF_ONLY_PNBUF);
1408 	vput(nd.ni_dvp);
1409 	vn_finished_write(mp);
1410 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1411 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1412 	return (error);
1413 }
1414 
1415 /*
1416  * Delete a name from the filesystem.
1417  */
1418 #ifndef _SYS_SYSPROTO_H_
1419 struct unlink_args {
1420 	char	*path;
1421 };
1422 #endif
1423 /* ARGSUSED */
1424 int
1425 unlink(p, uap)
1426 	struct proc *p;
1427 	struct unlink_args /* {
1428 		syscallarg(char *) path;
1429 	} */ *uap;
1430 {
1431 	struct mount *mp;
1432 	struct vnode *vp;
1433 	int error;
1434 	struct nameidata nd;
1435 
1436 restart:
1437 	bwillwrite();
1438 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1439 	if ((error = namei(&nd)) != 0)
1440 		return (error);
1441 	vp = nd.ni_vp;
1442 	if (vp->v_type == VDIR)
1443 		error = EPERM;		/* POSIX */
1444 	else {
1445 		/*
1446 		 * The root of a mounted filesystem cannot be deleted.
1447 		 *
1448 		 * XXX: can this only be a VDIR case?
1449 		 */
1450 		if (vp->v_flag & VROOT)
1451 			error = EBUSY;
1452 	}
1453 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1454 		NDFREE(&nd, NDF_ONLY_PNBUF);
1455 		vrele(vp);
1456 		vput(nd.ni_dvp);
1457 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1458 			return (error);
1459 		goto restart;
1460 	}
1461 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1462 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1463 	if (!error) {
1464 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1465 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1466 	}
1467 	NDFREE(&nd, NDF_ONLY_PNBUF);
1468 	vput(nd.ni_dvp);
1469 	vput(vp);
1470 	vn_finished_write(mp);
1471 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1472 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1473 	return (error);
1474 }
1475 
1476 /*
1477  * Reposition read/write file offset.
1478  */
1479 #ifndef _SYS_SYSPROTO_H_
1480 struct lseek_args {
1481 	int	fd;
1482 	int	pad;
1483 	off_t	offset;
1484 	int	whence;
1485 };
1486 #endif
1487 int
1488 lseek(p, uap)
1489 	struct proc *p;
1490 	register struct lseek_args /* {
1491 		syscallarg(int) fd;
1492 		syscallarg(int) pad;
1493 		syscallarg(off_t) offset;
1494 		syscallarg(int) whence;
1495 	} */ *uap;
1496 {
1497 	struct ucred *cred = p->p_ucred;
1498 	register struct filedesc *fdp = p->p_fd;
1499 	register struct file *fp;
1500 	struct vattr vattr;
1501 	int error;
1502 
1503 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1504 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1505 		return (EBADF);
1506 	if (fp->f_type != DTYPE_VNODE)
1507 		return (ESPIPE);
1508 	switch (SCARG(uap, whence)) {
1509 	case L_INCR:
1510 		fp->f_offset += SCARG(uap, offset);
1511 		break;
1512 	case L_XTND:
1513 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1514 		if (error)
1515 			return (error);
1516 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1517 		break;
1518 	case L_SET:
1519 		fp->f_offset = SCARG(uap, offset);
1520 		break;
1521 	default:
1522 		return (EINVAL);
1523 	}
1524 	*(off_t *)(p->p_retval) = fp->f_offset;
1525 	return (0);
1526 }
1527 
1528 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1529 /*
1530  * Reposition read/write file offset.
1531  */
1532 #ifndef _SYS_SYSPROTO_H_
1533 struct olseek_args {
1534 	int	fd;
1535 	long	offset;
1536 	int	whence;
1537 };
1538 #endif
1539 int
1540 olseek(p, uap)
1541 	struct proc *p;
1542 	register struct olseek_args /* {
1543 		syscallarg(int) fd;
1544 		syscallarg(long) offset;
1545 		syscallarg(int) whence;
1546 	} */ *uap;
1547 {
1548 	struct lseek_args /* {
1549 		syscallarg(int) fd;
1550 		syscallarg(int) pad;
1551 		syscallarg(off_t) offset;
1552 		syscallarg(int) whence;
1553 	} */ nuap;
1554 	int error;
1555 
1556 	SCARG(&nuap, fd) = SCARG(uap, fd);
1557 	SCARG(&nuap, offset) = SCARG(uap, offset);
1558 	SCARG(&nuap, whence) = SCARG(uap, whence);
1559 	error = lseek(p, &nuap);
1560 	return (error);
1561 }
1562 #endif /* COMPAT_43 */
1563 
1564 /*
1565  * Check access permissions.
1566  */
1567 #ifndef _SYS_SYSPROTO_H_
1568 struct access_args {
1569 	char	*path;
1570 	int	flags;
1571 };
1572 #endif
1573 int
1574 access(p, uap)
1575 	struct proc *p;
1576 	register struct access_args /* {
1577 		syscallarg(char *) path;
1578 		syscallarg(int) flags;
1579 	} */ *uap;
1580 {
1581 	register struct ucred *cred = p->p_ucred;
1582 	register struct vnode *vp;
1583 	int error, flags, t_gid, t_uid;
1584 	struct nameidata nd;
1585 
1586 	t_uid = cred->cr_uid;
1587 	t_gid = cred->cr_groups[0];
1588 	cred->cr_uid = p->p_cred->p_ruid;
1589 	cred->cr_groups[0] = p->p_cred->p_rgid;
1590 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1591 	    SCARG(uap, path), p);
1592 	if ((error = namei(&nd)) != 0)
1593 		goto out1;
1594 	vp = nd.ni_vp;
1595 
1596 	/* Flags == 0 means only check for existence. */
1597 	if (SCARG(uap, flags)) {
1598 		flags = 0;
1599 		if (SCARG(uap, flags) & R_OK)
1600 			flags |= VREAD;
1601 		if (SCARG(uap, flags) & W_OK)
1602 			flags |= VWRITE;
1603 		if (SCARG(uap, flags) & X_OK)
1604 			flags |= VEXEC;
1605 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1606 			error = VOP_ACCESS(vp, flags, cred, p);
1607 	}
1608 	NDFREE(&nd, NDF_ONLY_PNBUF);
1609 	vput(vp);
1610 out1:
1611 	cred->cr_uid = t_uid;
1612 	cred->cr_groups[0] = t_gid;
1613 	return (error);
1614 }
1615 
1616 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1617 /*
1618  * Get file status; this version follows links.
1619  */
1620 #ifndef _SYS_SYSPROTO_H_
1621 struct ostat_args {
1622 	char	*path;
1623 	struct ostat *ub;
1624 };
1625 #endif
1626 /* ARGSUSED */
1627 int
1628 ostat(p, uap)
1629 	struct proc *p;
1630 	register struct ostat_args /* {
1631 		syscallarg(char *) path;
1632 		syscallarg(struct ostat *) ub;
1633 	} */ *uap;
1634 {
1635 	struct stat sb;
1636 	struct ostat osb;
1637 	int error;
1638 	struct nameidata nd;
1639 
1640 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1641 	    SCARG(uap, path), p);
1642 	if ((error = namei(&nd)) != 0)
1643 		return (error);
1644 	NDFREE(&nd, NDF_ONLY_PNBUF);
1645 	error = vn_stat(nd.ni_vp, &sb, p);
1646 	vput(nd.ni_vp);
1647 	if (error)
1648 		return (error);
1649 	cvtstat(&sb, &osb);
1650 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1651 	return (error);
1652 }
1653 
1654 /*
1655  * Get file status; this version does not follow links.
1656  */
1657 #ifndef _SYS_SYSPROTO_H_
1658 struct olstat_args {
1659 	char	*path;
1660 	struct ostat *ub;
1661 };
1662 #endif
1663 /* ARGSUSED */
1664 int
1665 olstat(p, uap)
1666 	struct proc *p;
1667 	register struct olstat_args /* {
1668 		syscallarg(char *) path;
1669 		syscallarg(struct ostat *) ub;
1670 	} */ *uap;
1671 {
1672 	struct vnode *vp;
1673 	struct stat sb;
1674 	struct ostat osb;
1675 	int error;
1676 	struct nameidata nd;
1677 
1678 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1679 	    SCARG(uap, path), p);
1680 	if ((error = namei(&nd)) != 0)
1681 		return (error);
1682 	vp = nd.ni_vp;
1683 	error = vn_stat(vp, &sb, p);
1684 	NDFREE(&nd, NDF_ONLY_PNBUF);
1685 	vput(vp);
1686 	if (error)
1687 		return (error);
1688 	cvtstat(&sb, &osb);
1689 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1690 	return (error);
1691 }
1692 
1693 /*
1694  * Convert from an old to a new stat structure.
1695  */
1696 void
1697 cvtstat(st, ost)
1698 	struct stat *st;
1699 	struct ostat *ost;
1700 {
1701 
1702 	ost->st_dev = st->st_dev;
1703 	ost->st_ino = st->st_ino;
1704 	ost->st_mode = st->st_mode;
1705 	ost->st_nlink = st->st_nlink;
1706 	ost->st_uid = st->st_uid;
1707 	ost->st_gid = st->st_gid;
1708 	ost->st_rdev = st->st_rdev;
1709 	if (st->st_size < (quad_t)1 << 32)
1710 		ost->st_size = st->st_size;
1711 	else
1712 		ost->st_size = -2;
1713 	ost->st_atime = st->st_atime;
1714 	ost->st_mtime = st->st_mtime;
1715 	ost->st_ctime = st->st_ctime;
1716 	ost->st_blksize = st->st_blksize;
1717 	ost->st_blocks = st->st_blocks;
1718 	ost->st_flags = st->st_flags;
1719 	ost->st_gen = st->st_gen;
1720 }
1721 #endif /* COMPAT_43 || COMPAT_SUNOS */
1722 
1723 /*
1724  * Get file status; this version follows links.
1725  */
1726 #ifndef _SYS_SYSPROTO_H_
1727 struct stat_args {
1728 	char	*path;
1729 	struct stat *ub;
1730 };
1731 #endif
1732 /* ARGSUSED */
1733 int
1734 stat(p, uap)
1735 	struct proc *p;
1736 	register struct stat_args /* {
1737 		syscallarg(char *) path;
1738 		syscallarg(struct stat *) ub;
1739 	} */ *uap;
1740 {
1741 	struct stat sb;
1742 	int error;
1743 	struct nameidata nd;
1744 
1745 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1746 	    SCARG(uap, path), p);
1747 	if ((error = namei(&nd)) != 0)
1748 		return (error);
1749 	error = vn_stat(nd.ni_vp, &sb, p);
1750 	NDFREE(&nd, NDF_ONLY_PNBUF);
1751 	vput(nd.ni_vp);
1752 	if (error)
1753 		return (error);
1754 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1755 	return (error);
1756 }
1757 
1758 /*
1759  * Get file status; this version does not follow links.
1760  */
1761 #ifndef _SYS_SYSPROTO_H_
1762 struct lstat_args {
1763 	char	*path;
1764 	struct stat *ub;
1765 };
1766 #endif
1767 /* ARGSUSED */
1768 int
1769 lstat(p, uap)
1770 	struct proc *p;
1771 	register struct lstat_args /* {
1772 		syscallarg(char *) path;
1773 		syscallarg(struct stat *) ub;
1774 	} */ *uap;
1775 {
1776 	int error;
1777 	struct vnode *vp;
1778 	struct stat sb;
1779 	struct nameidata nd;
1780 
1781 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1782 	    SCARG(uap, path), p);
1783 	if ((error = namei(&nd)) != 0)
1784 		return (error);
1785 	vp = nd.ni_vp;
1786 	error = vn_stat(vp, &sb, p);
1787 	NDFREE(&nd, NDF_ONLY_PNBUF);
1788 	vput(vp);
1789 	if (error)
1790 		return (error);
1791 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1792 	return (error);
1793 }
1794 
1795 void
1796 cvtnstat(sb, nsb)
1797 	struct stat *sb;
1798 	struct nstat *nsb;
1799 {
1800 	nsb->st_dev = sb->st_dev;
1801 	nsb->st_ino = sb->st_ino;
1802 	nsb->st_mode = sb->st_mode;
1803 	nsb->st_nlink = sb->st_nlink;
1804 	nsb->st_uid = sb->st_uid;
1805 	nsb->st_gid = sb->st_gid;
1806 	nsb->st_rdev = sb->st_rdev;
1807 	nsb->st_atimespec = sb->st_atimespec;
1808 	nsb->st_mtimespec = sb->st_mtimespec;
1809 	nsb->st_ctimespec = sb->st_ctimespec;
1810 	nsb->st_size = sb->st_size;
1811 	nsb->st_blocks = sb->st_blocks;
1812 	nsb->st_blksize = sb->st_blksize;
1813 	nsb->st_flags = sb->st_flags;
1814 	nsb->st_gen = sb->st_gen;
1815 	nsb->st_qspare[0] = sb->st_qspare[0];
1816 	nsb->st_qspare[1] = sb->st_qspare[1];
1817 }
1818 
1819 #ifndef _SYS_SYSPROTO_H_
1820 struct nstat_args {
1821 	char	*path;
1822 	struct nstat *ub;
1823 };
1824 #endif
1825 /* ARGSUSED */
1826 int
1827 nstat(p, uap)
1828 	struct proc *p;
1829 	register struct nstat_args /* {
1830 		syscallarg(char *) path;
1831 		syscallarg(struct nstat *) ub;
1832 	} */ *uap;
1833 {
1834 	struct stat sb;
1835 	struct nstat nsb;
1836 	int error;
1837 	struct nameidata nd;
1838 
1839 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1840 	    SCARG(uap, path), p);
1841 	if ((error = namei(&nd)) != 0)
1842 		return (error);
1843 	NDFREE(&nd, NDF_ONLY_PNBUF);
1844 	error = vn_stat(nd.ni_vp, &sb, p);
1845 	vput(nd.ni_vp);
1846 	if (error)
1847 		return (error);
1848 	cvtnstat(&sb, &nsb);
1849 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1850 	return (error);
1851 }
1852 
1853 /*
1854  * Get file status; this version does not follow links.
1855  */
1856 #ifndef _SYS_SYSPROTO_H_
1857 struct lstat_args {
1858 	char	*path;
1859 	struct stat *ub;
1860 };
1861 #endif
1862 /* ARGSUSED */
1863 int
1864 nlstat(p, uap)
1865 	struct proc *p;
1866 	register struct nlstat_args /* {
1867 		syscallarg(char *) path;
1868 		syscallarg(struct nstat *) ub;
1869 	} */ *uap;
1870 {
1871 	int error;
1872 	struct vnode *vp;
1873 	struct stat sb;
1874 	struct nstat nsb;
1875 	struct nameidata nd;
1876 
1877 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1878 	    SCARG(uap, path), p);
1879 	if ((error = namei(&nd)) != 0)
1880 		return (error);
1881 	vp = nd.ni_vp;
1882 	NDFREE(&nd, NDF_ONLY_PNBUF);
1883 	error = vn_stat(vp, &sb, p);
1884 	vput(vp);
1885 	if (error)
1886 		return (error);
1887 	cvtnstat(&sb, &nsb);
1888 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1889 	return (error);
1890 }
1891 
1892 /*
1893  * Get configurable pathname variables.
1894  */
1895 #ifndef _SYS_SYSPROTO_H_
1896 struct pathconf_args {
1897 	char	*path;
1898 	int	name;
1899 };
1900 #endif
1901 /* ARGSUSED */
1902 int
1903 pathconf(p, uap)
1904 	struct proc *p;
1905 	register struct pathconf_args /* {
1906 		syscallarg(char *) path;
1907 		syscallarg(int) name;
1908 	} */ *uap;
1909 {
1910 	int error;
1911 	struct nameidata nd;
1912 
1913 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1914 	    SCARG(uap, path), p);
1915 	if ((error = namei(&nd)) != 0)
1916 		return (error);
1917 	NDFREE(&nd, NDF_ONLY_PNBUF);
1918 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
1919 	vput(nd.ni_vp);
1920 	return (error);
1921 }
1922 
1923 /*
1924  * Return target name of a symbolic link.
1925  */
1926 #ifndef _SYS_SYSPROTO_H_
1927 struct readlink_args {
1928 	char	*path;
1929 	char	*buf;
1930 	int	count;
1931 };
1932 #endif
1933 /* ARGSUSED */
1934 int
1935 readlink(p, uap)
1936 	struct proc *p;
1937 	register struct readlink_args /* {
1938 		syscallarg(char *) path;
1939 		syscallarg(char *) buf;
1940 		syscallarg(int) count;
1941 	} */ *uap;
1942 {
1943 	register struct vnode *vp;
1944 	struct iovec aiov;
1945 	struct uio auio;
1946 	int error;
1947 	struct nameidata nd;
1948 
1949 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1950 	    SCARG(uap, path), p);
1951 	if ((error = namei(&nd)) != 0)
1952 		return (error);
1953 	NDFREE(&nd, NDF_ONLY_PNBUF);
1954 	vp = nd.ni_vp;
1955 	if (vp->v_type != VLNK)
1956 		error = EINVAL;
1957 	else {
1958 		aiov.iov_base = SCARG(uap, buf);
1959 		aiov.iov_len = SCARG(uap, count);
1960 		auio.uio_iov = &aiov;
1961 		auio.uio_iovcnt = 1;
1962 		auio.uio_offset = 0;
1963 		auio.uio_rw = UIO_READ;
1964 		auio.uio_segflg = UIO_USERSPACE;
1965 		auio.uio_procp = p;
1966 		auio.uio_resid = SCARG(uap, count);
1967 		error = VOP_READLINK(vp, &auio, p->p_ucred);
1968 	}
1969 	vput(vp);
1970 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
1971 	return (error);
1972 }
1973 
1974 static int
1975 setfflags(p, vp, flags)
1976 	struct proc *p;
1977 	struct vnode *vp;
1978 	int flags;
1979 {
1980 	int error;
1981 	struct mount *mp;
1982 	struct vattr vattr;
1983 
1984 	/*
1985 	 * Prevent non-root users from setting flags on devices.  When
1986 	 * a device is reused, users can retain ownership of the device
1987 	 * if they are allowed to set flags and programs assume that
1988 	 * chown can't fail when done as root.
1989 	 */
1990 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
1991 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
1992 		return (error);
1993 
1994 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1995 		return (error);
1996 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1997 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1998 	VATTR_NULL(&vattr);
1999 	vattr.va_flags = flags;
2000 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2001 	VOP_UNLOCK(vp, 0, p);
2002 	vn_finished_write(mp);
2003 	return (error);
2004 }
2005 
2006 /*
2007  * Change flags of a file given a path name.
2008  */
2009 #ifndef _SYS_SYSPROTO_H_
2010 struct chflags_args {
2011 	char	*path;
2012 	int	flags;
2013 };
2014 #endif
2015 /* ARGSUSED */
2016 int
2017 chflags(p, uap)
2018 	struct proc *p;
2019 	register struct chflags_args /* {
2020 		syscallarg(char *) path;
2021 		syscallarg(int) flags;
2022 	} */ *uap;
2023 {
2024 	int error;
2025 	struct nameidata nd;
2026 
2027 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2028 	if ((error = namei(&nd)) != 0)
2029 		return (error);
2030 	NDFREE(&nd, NDF_ONLY_PNBUF);
2031 	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2032 	vrele(nd.ni_vp);
2033 	return error;
2034 }
2035 
2036 /*
2037  * Change flags of a file given a file descriptor.
2038  */
2039 #ifndef _SYS_SYSPROTO_H_
2040 struct fchflags_args {
2041 	int	fd;
2042 	int	flags;
2043 };
2044 #endif
2045 /* ARGSUSED */
2046 int
2047 fchflags(p, uap)
2048 	struct proc *p;
2049 	register struct fchflags_args /* {
2050 		syscallarg(int) fd;
2051 		syscallarg(int) flags;
2052 	} */ *uap;
2053 {
2054 	struct file *fp;
2055 	int error;
2056 
2057 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2058 		return (error);
2059 	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2060 }
2061 
2062 static int
2063 setfmode(p, vp, mode)
2064 	struct proc *p;
2065 	struct vnode *vp;
2066 	int mode;
2067 {
2068 	int error;
2069 	struct mount *mp;
2070 	struct vattr vattr;
2071 
2072 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2073 		return (error);
2074 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2075 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2076 	VATTR_NULL(&vattr);
2077 	vattr.va_mode = mode & ALLPERMS;
2078 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2079 	VOP_UNLOCK(vp, 0, p);
2080 	vn_finished_write(mp);
2081 	return error;
2082 }
2083 
2084 /*
2085  * Change mode of a file given path name.
2086  */
2087 #ifndef _SYS_SYSPROTO_H_
2088 struct chmod_args {
2089 	char	*path;
2090 	int	mode;
2091 };
2092 #endif
2093 /* ARGSUSED */
2094 int
2095 chmod(p, uap)
2096 	struct proc *p;
2097 	register struct chmod_args /* {
2098 		syscallarg(char *) path;
2099 		syscallarg(int) mode;
2100 	} */ *uap;
2101 {
2102 	int error;
2103 	struct nameidata nd;
2104 
2105 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2106 	if ((error = namei(&nd)) != 0)
2107 		return (error);
2108 	NDFREE(&nd, NDF_ONLY_PNBUF);
2109 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2110 	vrele(nd.ni_vp);
2111 	return error;
2112 }
2113 
2114 /*
2115  * Change mode of a file given path name (don't follow links.)
2116  */
2117 #ifndef _SYS_SYSPROTO_H_
2118 struct lchmod_args {
2119 	char	*path;
2120 	int	mode;
2121 };
2122 #endif
2123 /* ARGSUSED */
2124 int
2125 lchmod(p, uap)
2126 	struct proc *p;
2127 	register struct lchmod_args /* {
2128 		syscallarg(char *) path;
2129 		syscallarg(int) mode;
2130 	} */ *uap;
2131 {
2132 	int error;
2133 	struct nameidata nd;
2134 
2135 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2136 	if ((error = namei(&nd)) != 0)
2137 		return (error);
2138 	NDFREE(&nd, NDF_ONLY_PNBUF);
2139 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2140 	vrele(nd.ni_vp);
2141 	return error;
2142 }
2143 
2144 /*
2145  * Change mode of a file given a file descriptor.
2146  */
2147 #ifndef _SYS_SYSPROTO_H_
2148 struct fchmod_args {
2149 	int	fd;
2150 	int	mode;
2151 };
2152 #endif
2153 /* ARGSUSED */
2154 int
2155 fchmod(p, uap)
2156 	struct proc *p;
2157 	register struct fchmod_args /* {
2158 		syscallarg(int) fd;
2159 		syscallarg(int) mode;
2160 	} */ *uap;
2161 {
2162 	struct file *fp;
2163 	int error;
2164 
2165 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2166 		return (error);
2167 	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2168 }
2169 
2170 static int
2171 setfown(p, vp, uid, gid)
2172 	struct proc *p;
2173 	struct vnode *vp;
2174 	uid_t uid;
2175 	gid_t gid;
2176 {
2177 	int error;
2178 	struct mount *mp;
2179 	struct vattr vattr;
2180 
2181 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2182 		return (error);
2183 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2184 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2185 	VATTR_NULL(&vattr);
2186 	vattr.va_uid = uid;
2187 	vattr.va_gid = gid;
2188 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2189 	VOP_UNLOCK(vp, 0, p);
2190 	vn_finished_write(mp);
2191 	return error;
2192 }
2193 
2194 /*
2195  * Set ownership given a path name.
2196  */
2197 #ifndef _SYS_SYSPROTO_H_
2198 struct chown_args {
2199 	char	*path;
2200 	int	uid;
2201 	int	gid;
2202 };
2203 #endif
2204 /* ARGSUSED */
2205 int
2206 chown(p, uap)
2207 	struct proc *p;
2208 	register struct chown_args /* {
2209 		syscallarg(char *) path;
2210 		syscallarg(int) uid;
2211 		syscallarg(int) gid;
2212 	} */ *uap;
2213 {
2214 	int error;
2215 	struct nameidata nd;
2216 
2217 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2218 	if ((error = namei(&nd)) != 0)
2219 		return (error);
2220 	NDFREE(&nd, NDF_ONLY_PNBUF);
2221 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2222 	vrele(nd.ni_vp);
2223 	return (error);
2224 }
2225 
2226 /*
2227  * Set ownership given a path name, do not cross symlinks.
2228  */
2229 #ifndef _SYS_SYSPROTO_H_
2230 struct lchown_args {
2231 	char	*path;
2232 	int	uid;
2233 	int	gid;
2234 };
2235 #endif
2236 /* ARGSUSED */
2237 int
2238 lchown(p, uap)
2239 	struct proc *p;
2240 	register struct lchown_args /* {
2241 		syscallarg(char *) path;
2242 		syscallarg(int) uid;
2243 		syscallarg(int) gid;
2244 	} */ *uap;
2245 {
2246 	int error;
2247 	struct nameidata nd;
2248 
2249 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2250 	if ((error = namei(&nd)) != 0)
2251 		return (error);
2252 	NDFREE(&nd, NDF_ONLY_PNBUF);
2253 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2254 	vrele(nd.ni_vp);
2255 	return (error);
2256 }
2257 
2258 /*
2259  * Set ownership given a file descriptor.
2260  */
2261 #ifndef _SYS_SYSPROTO_H_
2262 struct fchown_args {
2263 	int	fd;
2264 	int	uid;
2265 	int	gid;
2266 };
2267 #endif
2268 /* ARGSUSED */
2269 int
2270 fchown(p, uap)
2271 	struct proc *p;
2272 	register struct fchown_args /* {
2273 		syscallarg(int) fd;
2274 		syscallarg(int) uid;
2275 		syscallarg(int) gid;
2276 	} */ *uap;
2277 {
2278 	struct file *fp;
2279 	int error;
2280 
2281 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2282 		return (error);
2283 	return setfown(p, (struct vnode *)fp->f_data,
2284 		SCARG(uap, uid), SCARG(uap, gid));
2285 }
2286 
2287 static int
2288 getutimes(usrtvp, tsp)
2289 	const struct timeval *usrtvp;
2290 	struct timespec *tsp;
2291 {
2292 	struct timeval tv[2];
2293 	int error;
2294 
2295 	if (usrtvp == NULL) {
2296 		microtime(&tv[0]);
2297 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2298 		tsp[1] = tsp[0];
2299 	} else {
2300 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2301 			return (error);
2302 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2303 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2304 	}
2305 	return 0;
2306 }
2307 
2308 static int
2309 setutimes(p, vp, ts, nullflag)
2310 	struct proc *p;
2311 	struct vnode *vp;
2312 	const struct timespec *ts;
2313 	int nullflag;
2314 {
2315 	int error;
2316 	struct mount *mp;
2317 	struct vattr vattr;
2318 
2319 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2320 		return (error);
2321 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2322 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2323 	VATTR_NULL(&vattr);
2324 	vattr.va_atime = ts[0];
2325 	vattr.va_mtime = ts[1];
2326 	if (nullflag)
2327 		vattr.va_vaflags |= VA_UTIMES_NULL;
2328 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2329 	VOP_UNLOCK(vp, 0, p);
2330 	vn_finished_write(mp);
2331 	return error;
2332 }
2333 
2334 /*
2335  * Set the access and modification times of a file.
2336  */
2337 #ifndef _SYS_SYSPROTO_H_
2338 struct utimes_args {
2339 	char	*path;
2340 	struct	timeval *tptr;
2341 };
2342 #endif
2343 /* ARGSUSED */
2344 int
2345 utimes(p, uap)
2346 	struct proc *p;
2347 	register struct utimes_args /* {
2348 		syscallarg(char *) path;
2349 		syscallarg(struct timeval *) tptr;
2350 	} */ *uap;
2351 {
2352 	struct timespec ts[2];
2353 	struct timeval *usrtvp;
2354 	int error;
2355 	struct nameidata nd;
2356 
2357 	usrtvp = SCARG(uap, tptr);
2358 	if ((error = getutimes(usrtvp, ts)) != 0)
2359 		return (error);
2360 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2361 	if ((error = namei(&nd)) != 0)
2362 		return (error);
2363 	NDFREE(&nd, NDF_ONLY_PNBUF);
2364 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2365 	vrele(nd.ni_vp);
2366 	return (error);
2367 }
2368 
2369 /*
2370  * Set the access and modification times of a file.
2371  */
2372 #ifndef _SYS_SYSPROTO_H_
2373 struct lutimes_args {
2374 	char	*path;
2375 	struct	timeval *tptr;
2376 };
2377 #endif
2378 /* ARGSUSED */
2379 int
2380 lutimes(p, uap)
2381 	struct proc *p;
2382 	register struct lutimes_args /* {
2383 		syscallarg(char *) path;
2384 		syscallarg(struct timeval *) tptr;
2385 	} */ *uap;
2386 {
2387 	struct timespec ts[2];
2388 	struct timeval *usrtvp;
2389 	int error;
2390 	struct nameidata nd;
2391 
2392 	usrtvp = SCARG(uap, tptr);
2393 	if ((error = getutimes(usrtvp, ts)) != 0)
2394 		return (error);
2395 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2396 	if ((error = namei(&nd)) != 0)
2397 		return (error);
2398 	NDFREE(&nd, NDF_ONLY_PNBUF);
2399 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2400 	vrele(nd.ni_vp);
2401 	return (error);
2402 }
2403 
2404 /*
2405  * Set the access and modification times of a file.
2406  */
2407 #ifndef _SYS_SYSPROTO_H_
2408 struct futimes_args {
2409 	int	fd;
2410 	struct	timeval *tptr;
2411 };
2412 #endif
2413 /* ARGSUSED */
2414 int
2415 futimes(p, uap)
2416 	struct proc *p;
2417 	register struct futimes_args /* {
2418 		syscallarg(int ) fd;
2419 		syscallarg(struct timeval *) tptr;
2420 	} */ *uap;
2421 {
2422 	struct timespec ts[2];
2423 	struct file *fp;
2424 	struct timeval *usrtvp;
2425 	int error;
2426 
2427 	usrtvp = SCARG(uap, tptr);
2428 	if ((error = getutimes(usrtvp, ts)) != 0)
2429 		return (error);
2430 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2431 		return (error);
2432 	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2433 }
2434 
2435 /*
2436  * Truncate a file given its path name.
2437  */
2438 #ifndef _SYS_SYSPROTO_H_
2439 struct truncate_args {
2440 	char	*path;
2441 	int	pad;
2442 	off_t	length;
2443 };
2444 #endif
2445 /* ARGSUSED */
2446 int
2447 truncate(p, uap)
2448 	struct proc *p;
2449 	register struct truncate_args /* {
2450 		syscallarg(char *) path;
2451 		syscallarg(int) pad;
2452 		syscallarg(off_t) length;
2453 	} */ *uap;
2454 {
2455 	struct mount *mp;
2456 	struct vnode *vp;
2457 	struct vattr vattr;
2458 	int error;
2459 	struct nameidata nd;
2460 
2461 	if (uap->length < 0)
2462 		return(EINVAL);
2463 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2464 	if ((error = namei(&nd)) != 0)
2465 		return (error);
2466 	vp = nd.ni_vp;
2467 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2468 		vrele(vp);
2469 		return (error);
2470 	}
2471 	NDFREE(&nd, NDF_ONLY_PNBUF);
2472 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2473 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2474 	if (vp->v_type == VDIR)
2475 		error = EISDIR;
2476 	else if ((error = vn_writechk(vp)) == 0 &&
2477 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2478 		VATTR_NULL(&vattr);
2479 		vattr.va_size = SCARG(uap, length);
2480 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2481 	}
2482 	vput(vp);
2483 	vn_finished_write(mp);
2484 	return (error);
2485 }
2486 
2487 /*
2488  * Truncate a file given a file descriptor.
2489  */
2490 #ifndef _SYS_SYSPROTO_H_
2491 struct ftruncate_args {
2492 	int	fd;
2493 	int	pad;
2494 	off_t	length;
2495 };
2496 #endif
2497 /* ARGSUSED */
2498 int
2499 ftruncate(p, uap)
2500 	struct proc *p;
2501 	register struct ftruncate_args /* {
2502 		syscallarg(int) fd;
2503 		syscallarg(int) pad;
2504 		syscallarg(off_t) length;
2505 	} */ *uap;
2506 {
2507 	struct mount *mp;
2508 	struct vattr vattr;
2509 	struct vnode *vp;
2510 	struct file *fp;
2511 	int error;
2512 
2513 	if (uap->length < 0)
2514 		return(EINVAL);
2515 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2516 		return (error);
2517 	if ((fp->f_flag & FWRITE) == 0)
2518 		return (EINVAL);
2519 	vp = (struct vnode *)fp->f_data;
2520 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2521 		return (error);
2522 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2523 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2524 	if (vp->v_type == VDIR)
2525 		error = EISDIR;
2526 	else if ((error = vn_writechk(vp)) == 0) {
2527 		VATTR_NULL(&vattr);
2528 		vattr.va_size = SCARG(uap, length);
2529 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2530 	}
2531 	VOP_UNLOCK(vp, 0, p);
2532 	vn_finished_write(mp);
2533 	return (error);
2534 }
2535 
2536 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2537 /*
2538  * Truncate a file given its path name.
2539  */
2540 #ifndef _SYS_SYSPROTO_H_
2541 struct otruncate_args {
2542 	char	*path;
2543 	long	length;
2544 };
2545 #endif
2546 /* ARGSUSED */
2547 int
2548 otruncate(p, uap)
2549 	struct proc *p;
2550 	register struct otruncate_args /* {
2551 		syscallarg(char *) path;
2552 		syscallarg(long) length;
2553 	} */ *uap;
2554 {
2555 	struct truncate_args /* {
2556 		syscallarg(char *) path;
2557 		syscallarg(int) pad;
2558 		syscallarg(off_t) length;
2559 	} */ nuap;
2560 
2561 	SCARG(&nuap, path) = SCARG(uap, path);
2562 	SCARG(&nuap, length) = SCARG(uap, length);
2563 	return (truncate(p, &nuap));
2564 }
2565 
2566 /*
2567  * Truncate a file given a file descriptor.
2568  */
2569 #ifndef _SYS_SYSPROTO_H_
2570 struct oftruncate_args {
2571 	int	fd;
2572 	long	length;
2573 };
2574 #endif
2575 /* ARGSUSED */
2576 int
2577 oftruncate(p, uap)
2578 	struct proc *p;
2579 	register struct oftruncate_args /* {
2580 		syscallarg(int) fd;
2581 		syscallarg(long) length;
2582 	} */ *uap;
2583 {
2584 	struct ftruncate_args /* {
2585 		syscallarg(int) fd;
2586 		syscallarg(int) pad;
2587 		syscallarg(off_t) length;
2588 	} */ nuap;
2589 
2590 	SCARG(&nuap, fd) = SCARG(uap, fd);
2591 	SCARG(&nuap, length) = SCARG(uap, length);
2592 	return (ftruncate(p, &nuap));
2593 }
2594 #endif /* COMPAT_43 || COMPAT_SUNOS */
2595 
2596 /*
2597  * Sync an open file.
2598  */
2599 #ifndef _SYS_SYSPROTO_H_
2600 struct fsync_args {
2601 	int	fd;
2602 };
2603 #endif
2604 /* ARGSUSED */
2605 int
2606 fsync(p, uap)
2607 	struct proc *p;
2608 	struct fsync_args /* {
2609 		syscallarg(int) fd;
2610 	} */ *uap;
2611 {
2612 	struct vnode *vp;
2613 	struct mount *mp;
2614 	struct file *fp;
2615 	int error;
2616 
2617 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2618 		return (error);
2619 	vp = (struct vnode *)fp->f_data;
2620 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2621 		return (error);
2622 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2623 	if (vp->v_object)
2624 		vm_object_page_clean(vp->v_object, 0, 0, 0);
2625 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2626 #ifdef SOFTUPDATES
2627 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2628 	    error = softdep_fsync(vp);
2629 #endif
2630 
2631 	VOP_UNLOCK(vp, 0, p);
2632 	vn_finished_write(mp);
2633 	return (error);
2634 }
2635 
2636 /*
2637  * Rename files.  Source and destination must either both be directories,
2638  * or both not be directories.  If target is a directory, it must be empty.
2639  */
2640 #ifndef _SYS_SYSPROTO_H_
2641 struct rename_args {
2642 	char	*from;
2643 	char	*to;
2644 };
2645 #endif
2646 /* ARGSUSED */
2647 int
2648 rename(p, uap)
2649 	struct proc *p;
2650 	register struct rename_args /* {
2651 		syscallarg(char *) from;
2652 		syscallarg(char *) to;
2653 	} */ *uap;
2654 {
2655 	struct mount *mp;
2656 	struct vnode *tvp, *fvp, *tdvp;
2657 	struct nameidata fromnd, tond;
2658 	int error;
2659 
2660 	bwillwrite();
2661 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2662 	    SCARG(uap, from), p);
2663 	if ((error = namei(&fromnd)) != 0)
2664 		return (error);
2665 	fvp = fromnd.ni_vp;
2666 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2667 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2668 		vrele(fromnd.ni_dvp);
2669 		vrele(fvp);
2670 		goto out1;
2671 	}
2672 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2673 	    UIO_USERSPACE, SCARG(uap, to), p);
2674 	if (fromnd.ni_vp->v_type == VDIR)
2675 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2676 	if ((error = namei(&tond)) != 0) {
2677 		/* Translate error code for rename("dir1", "dir2/."). */
2678 		if (error == EISDIR && fvp->v_type == VDIR)
2679 			error = EINVAL;
2680 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2681 		vrele(fromnd.ni_dvp);
2682 		vrele(fvp);
2683 		goto out1;
2684 	}
2685 	tdvp = tond.ni_dvp;
2686 	tvp = tond.ni_vp;
2687 	if (tvp != NULL) {
2688 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2689 			error = ENOTDIR;
2690 			goto out;
2691 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2692 			error = EISDIR;
2693 			goto out;
2694 		}
2695 	}
2696 	if (fvp == tdvp)
2697 		error = EINVAL;
2698 	/*
2699 	 * If source is the same as the destination (that is the
2700 	 * same inode number with the same name in the same directory),
2701 	 * then there is nothing to do.
2702 	 */
2703 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2704 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2705 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2706 	      fromnd.ni_cnd.cn_namelen))
2707 		error = -1;
2708 out:
2709 	if (!error) {
2710 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2711 		if (fromnd.ni_dvp != tdvp) {
2712 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2713 		}
2714 		if (tvp) {
2715 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2716 		}
2717 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2718 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2719 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2720 		NDFREE(&tond, NDF_ONLY_PNBUF);
2721 	} else {
2722 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2723 		NDFREE(&tond, NDF_ONLY_PNBUF);
2724 		if (tdvp == tvp)
2725 			vrele(tdvp);
2726 		else
2727 			vput(tdvp);
2728 		if (tvp)
2729 			vput(tvp);
2730 		vrele(fromnd.ni_dvp);
2731 		vrele(fvp);
2732 	}
2733 	vrele(tond.ni_startdir);
2734 	vn_finished_write(mp);
2735 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2736 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2737 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2738 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2739 out1:
2740 	if (fromnd.ni_startdir)
2741 		vrele(fromnd.ni_startdir);
2742 	if (error == -1)
2743 		return (0);
2744 	return (error);
2745 }
2746 
2747 /*
2748  * Make a directory file.
2749  */
2750 #ifndef _SYS_SYSPROTO_H_
2751 struct mkdir_args {
2752 	char	*path;
2753 	int	mode;
2754 };
2755 #endif
2756 /* ARGSUSED */
2757 int
2758 mkdir(p, uap)
2759 	struct proc *p;
2760 	register struct mkdir_args /* {
2761 		syscallarg(char *) path;
2762 		syscallarg(int) mode;
2763 	} */ *uap;
2764 {
2765 	struct mount *mp;
2766 	struct vnode *vp;
2767 	struct vattr vattr;
2768 	int error;
2769 	struct nameidata nd;
2770 
2771 restart:
2772 	bwillwrite();
2773 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2774 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2775 	if ((error = namei(&nd)) != 0)
2776 		return (error);
2777 	vp = nd.ni_vp;
2778 	if (vp != NULL) {
2779 		NDFREE(&nd, NDF_ONLY_PNBUF);
2780 		vrele(vp);
2781 		vput(nd.ni_dvp);
2782 		return (EEXIST);
2783 	}
2784 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2785 		NDFREE(&nd, NDF_ONLY_PNBUF);
2786 		vput(nd.ni_dvp);
2787 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2788 			return (error);
2789 		goto restart;
2790 	}
2791 	VATTR_NULL(&vattr);
2792 	vattr.va_type = VDIR;
2793 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2794 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2795 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2796 	NDFREE(&nd, NDF_ONLY_PNBUF);
2797 	vput(nd.ni_dvp);
2798 	if (!error)
2799 		vput(nd.ni_vp);
2800 	vn_finished_write(mp);
2801 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2802 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2803 	return (error);
2804 }
2805 
2806 /*
2807  * Remove a directory file.
2808  */
2809 #ifndef _SYS_SYSPROTO_H_
2810 struct rmdir_args {
2811 	char	*path;
2812 };
2813 #endif
2814 /* ARGSUSED */
2815 int
2816 rmdir(p, uap)
2817 	struct proc *p;
2818 	struct rmdir_args /* {
2819 		syscallarg(char *) path;
2820 	} */ *uap;
2821 {
2822 	struct mount *mp;
2823 	struct vnode *vp;
2824 	int error;
2825 	struct nameidata nd;
2826 
2827 restart:
2828 	bwillwrite();
2829 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2830 	    SCARG(uap, path), p);
2831 	if ((error = namei(&nd)) != 0)
2832 		return (error);
2833 	vp = nd.ni_vp;
2834 	if (vp->v_type != VDIR) {
2835 		error = ENOTDIR;
2836 		goto out;
2837 	}
2838 	/*
2839 	 * No rmdir "." please.
2840 	 */
2841 	if (nd.ni_dvp == vp) {
2842 		error = EINVAL;
2843 		goto out;
2844 	}
2845 	/*
2846 	 * The root of a mounted filesystem cannot be deleted.
2847 	 */
2848 	if (vp->v_flag & VROOT) {
2849 		error = EBUSY;
2850 		goto out;
2851 	}
2852 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2853 		NDFREE(&nd, NDF_ONLY_PNBUF);
2854 		if (nd.ni_dvp == vp)
2855 			vrele(nd.ni_dvp);
2856 		else
2857 			vput(nd.ni_dvp);
2858 		vput(vp);
2859 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2860 			return (error);
2861 		goto restart;
2862 	}
2863 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2864 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2865 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2866 	vn_finished_write(mp);
2867 out:
2868 	NDFREE(&nd, NDF_ONLY_PNBUF);
2869 	if (nd.ni_dvp == vp)
2870 		vrele(nd.ni_dvp);
2871 	else
2872 		vput(nd.ni_dvp);
2873 	vput(vp);
2874 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2875 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2876 	return (error);
2877 }
2878 
2879 #ifdef COMPAT_43
2880 /*
2881  * Read a block of directory entries in a file system independent format.
2882  */
2883 #ifndef _SYS_SYSPROTO_H_
2884 struct ogetdirentries_args {
2885 	int	fd;
2886 	char	*buf;
2887 	u_int	count;
2888 	long	*basep;
2889 };
2890 #endif
2891 int
2892 ogetdirentries(p, uap)
2893 	struct proc *p;
2894 	register struct ogetdirentries_args /* {
2895 		syscallarg(int) fd;
2896 		syscallarg(char *) buf;
2897 		syscallarg(u_int) count;
2898 		syscallarg(long *) basep;
2899 	} */ *uap;
2900 {
2901 	struct vnode *vp;
2902 	struct file *fp;
2903 	struct uio auio, kuio;
2904 	struct iovec aiov, kiov;
2905 	struct dirent *dp, *edp;
2906 	caddr_t dirbuf;
2907 	int error, eofflag, readcnt;
2908 	long loff;
2909 
2910 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2911 		return (error);
2912 	if ((fp->f_flag & FREAD) == 0)
2913 		return (EBADF);
2914 	vp = (struct vnode *)fp->f_data;
2915 unionread:
2916 	if (vp->v_type != VDIR)
2917 		return (EINVAL);
2918 	aiov.iov_base = SCARG(uap, buf);
2919 	aiov.iov_len = SCARG(uap, count);
2920 	auio.uio_iov = &aiov;
2921 	auio.uio_iovcnt = 1;
2922 	auio.uio_rw = UIO_READ;
2923 	auio.uio_segflg = UIO_USERSPACE;
2924 	auio.uio_procp = p;
2925 	auio.uio_resid = SCARG(uap, count);
2926 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2927 	loff = auio.uio_offset = fp->f_offset;
2928 #	if (BYTE_ORDER != LITTLE_ENDIAN)
2929 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2930 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
2931 			    NULL, NULL);
2932 			fp->f_offset = auio.uio_offset;
2933 		} else
2934 #	endif
2935 	{
2936 		kuio = auio;
2937 		kuio.uio_iov = &kiov;
2938 		kuio.uio_segflg = UIO_SYSSPACE;
2939 		kiov.iov_len = SCARG(uap, count);
2940 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
2941 		kiov.iov_base = dirbuf;
2942 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
2943 			    NULL, NULL);
2944 		fp->f_offset = kuio.uio_offset;
2945 		if (error == 0) {
2946 			readcnt = SCARG(uap, count) - kuio.uio_resid;
2947 			edp = (struct dirent *)&dirbuf[readcnt];
2948 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2949 #				if (BYTE_ORDER == LITTLE_ENDIAN)
2950 					/*
2951 					 * The expected low byte of
2952 					 * dp->d_namlen is our dp->d_type.
2953 					 * The high MBZ byte of dp->d_namlen
2954 					 * is our dp->d_namlen.
2955 					 */
2956 					dp->d_type = dp->d_namlen;
2957 					dp->d_namlen = 0;
2958 #				else
2959 					/*
2960 					 * The dp->d_type is the high byte
2961 					 * of the expected dp->d_namlen,
2962 					 * so must be zero'ed.
2963 					 */
2964 					dp->d_type = 0;
2965 #				endif
2966 				if (dp->d_reclen > 0) {
2967 					dp = (struct dirent *)
2968 					    ((char *)dp + dp->d_reclen);
2969 				} else {
2970 					error = EIO;
2971 					break;
2972 				}
2973 			}
2974 			if (dp >= edp)
2975 				error = uiomove(dirbuf, readcnt, &auio);
2976 		}
2977 		FREE(dirbuf, M_TEMP);
2978 	}
2979 	VOP_UNLOCK(vp, 0, p);
2980 	if (error)
2981 		return (error);
2982 	if (SCARG(uap, count) == auio.uio_resid) {
2983 		if (union_dircheckp) {
2984 			error = union_dircheckp(p, &vp, fp);
2985 			if (error == -1)
2986 				goto unionread;
2987 			if (error)
2988 				return (error);
2989 		}
2990 		if ((vp->v_flag & VROOT) &&
2991 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
2992 			struct vnode *tvp = vp;
2993 			vp = vp->v_mount->mnt_vnodecovered;
2994 			VREF(vp);
2995 			fp->f_data = (caddr_t) vp;
2996 			fp->f_offset = 0;
2997 			vrele(tvp);
2998 			goto unionread;
2999 		}
3000 	}
3001 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3002 	    sizeof(long));
3003 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3004 	return (error);
3005 }
3006 #endif /* COMPAT_43 */
3007 
3008 /*
3009  * Read a block of directory entries in a file system independent format.
3010  */
3011 #ifndef _SYS_SYSPROTO_H_
3012 struct getdirentries_args {
3013 	int	fd;
3014 	char	*buf;
3015 	u_int	count;
3016 	long	*basep;
3017 };
3018 #endif
3019 int
3020 getdirentries(p, uap)
3021 	struct proc *p;
3022 	register struct getdirentries_args /* {
3023 		syscallarg(int) fd;
3024 		syscallarg(char *) buf;
3025 		syscallarg(u_int) count;
3026 		syscallarg(long *) basep;
3027 	} */ *uap;
3028 {
3029 	struct vnode *vp;
3030 	struct file *fp;
3031 	struct uio auio;
3032 	struct iovec aiov;
3033 	long loff;
3034 	int error, eofflag;
3035 
3036 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3037 		return (error);
3038 	if ((fp->f_flag & FREAD) == 0)
3039 		return (EBADF);
3040 	vp = (struct vnode *)fp->f_data;
3041 unionread:
3042 	if (vp->v_type != VDIR)
3043 		return (EINVAL);
3044 	aiov.iov_base = SCARG(uap, buf);
3045 	aiov.iov_len = SCARG(uap, count);
3046 	auio.uio_iov = &aiov;
3047 	auio.uio_iovcnt = 1;
3048 	auio.uio_rw = UIO_READ;
3049 	auio.uio_segflg = UIO_USERSPACE;
3050 	auio.uio_procp = p;
3051 	auio.uio_resid = SCARG(uap, count);
3052 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3053 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3054 	loff = auio.uio_offset = fp->f_offset;
3055 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3056 	fp->f_offset = auio.uio_offset;
3057 	VOP_UNLOCK(vp, 0, p);
3058 	if (error)
3059 		return (error);
3060 	if (SCARG(uap, count) == auio.uio_resid) {
3061 		if (union_dircheckp) {
3062 			error = union_dircheckp(p, &vp, fp);
3063 			if (error == -1)
3064 				goto unionread;
3065 			if (error)
3066 				return (error);
3067 		}
3068 		if ((vp->v_flag & VROOT) &&
3069 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3070 			struct vnode *tvp = vp;
3071 			vp = vp->v_mount->mnt_vnodecovered;
3072 			VREF(vp);
3073 			fp->f_data = (caddr_t) vp;
3074 			fp->f_offset = 0;
3075 			vrele(tvp);
3076 			goto unionread;
3077 		}
3078 	}
3079 	if (SCARG(uap, basep) != NULL) {
3080 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3081 		    sizeof(long));
3082 	}
3083 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3084 	return (error);
3085 }
3086 #ifndef _SYS_SYSPROTO_H_
3087 struct getdents_args {
3088 	int fd;
3089 	char *buf;
3090 	size_t count;
3091 };
3092 #endif
3093 int
3094 getdents(p, uap)
3095 	struct proc *p;
3096 	register struct getdents_args /* {
3097 		syscallarg(int) fd;
3098 		syscallarg(char *) buf;
3099 		syscallarg(u_int) count;
3100 	} */ *uap;
3101 {
3102 	struct getdirentries_args ap;
3103 	ap.fd = uap->fd;
3104 	ap.buf = uap->buf;
3105 	ap.count = uap->count;
3106 	ap.basep = NULL;
3107 	return getdirentries(p, &ap);
3108 }
3109 
3110 /*
3111  * Set the mode mask for creation of filesystem nodes.
3112  *
3113  * MP SAFE
3114  */
3115 #ifndef _SYS_SYSPROTO_H_
3116 struct umask_args {
3117 	int	newmask;
3118 };
3119 #endif
3120 int
3121 umask(p, uap)
3122 	struct proc *p;
3123 	struct umask_args /* {
3124 		syscallarg(int) newmask;
3125 	} */ *uap;
3126 {
3127 	register struct filedesc *fdp;
3128 
3129 	fdp = p->p_fd;
3130 	p->p_retval[0] = fdp->fd_cmask;
3131 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3132 	return (0);
3133 }
3134 
3135 /*
3136  * Void all references to file by ripping underlying filesystem
3137  * away from vnode.
3138  */
3139 #ifndef _SYS_SYSPROTO_H_
3140 struct revoke_args {
3141 	char	*path;
3142 };
3143 #endif
3144 /* ARGSUSED */
3145 int
3146 revoke(p, uap)
3147 	struct proc *p;
3148 	register struct revoke_args /* {
3149 		syscallarg(char *) path;
3150 	} */ *uap;
3151 {
3152 	struct mount *mp;
3153 	struct vnode *vp;
3154 	struct vattr vattr;
3155 	int error;
3156 	struct nameidata nd;
3157 
3158 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3159 	if ((error = namei(&nd)) != 0)
3160 		return (error);
3161 	vp = nd.ni_vp;
3162 	NDFREE(&nd, NDF_ONLY_PNBUF);
3163 	if (vp->v_type != VCHR && vp->v_type != VBLK) {
3164 		error = EINVAL;
3165 		goto out;
3166 	}
3167 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3168 		goto out;
3169 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3170 	    (error = suser_xxx(0, p, PRISON_ROOT)))
3171 		goto out;
3172 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3173 		goto out;
3174 	if (vcount(vp) > 1)
3175 		VOP_REVOKE(vp, REVOKEALL);
3176 	vn_finished_write(mp);
3177 out:
3178 	vrele(vp);
3179 	return (error);
3180 }
3181 
3182 /*
3183  * Convert a user file descriptor to a kernel file entry.
3184  */
3185 int
3186 getvnode(fdp, fd, fpp)
3187 	struct filedesc *fdp;
3188 	int fd;
3189 	struct file **fpp;
3190 {
3191 	struct file *fp;
3192 
3193 	if ((u_int)fd >= fdp->fd_nfiles ||
3194 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3195 		return (EBADF);
3196 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3197 		return (EINVAL);
3198 	*fpp = fp;
3199 	return (0);
3200 }
3201 /*
3202  * Get (NFS) file handle
3203  */
3204 #ifndef _SYS_SYSPROTO_H_
3205 struct getfh_args {
3206 	char	*fname;
3207 	fhandle_t *fhp;
3208 };
3209 #endif
3210 int
3211 getfh(p, uap)
3212 	struct proc *p;
3213 	register struct getfh_args *uap;
3214 {
3215 	struct nameidata nd;
3216 	fhandle_t fh;
3217 	register struct vnode *vp;
3218 	int error;
3219 
3220 	/*
3221 	 * Must be super user
3222 	 */
3223 	error = suser(p);
3224 	if (error)
3225 		return (error);
3226 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3227 	error = namei(&nd);
3228 	if (error)
3229 		return (error);
3230 	NDFREE(&nd, NDF_ONLY_PNBUF);
3231 	vp = nd.ni_vp;
3232 	bzero(&fh, sizeof(fh));
3233 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3234 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3235 	vput(vp);
3236 	if (error)
3237 		return (error);
3238 	error = copyout(&fh, uap->fhp, sizeof (fh));
3239 	return (error);
3240 }
3241 
3242 /*
3243  * syscall for the rpc.lockd to use to translate a NFS file handle into
3244  * an open descriptor.
3245  *
3246  * warning: do not remove the suser() call or this becomes one giant
3247  * security hole.
3248  */
3249 #ifndef _SYS_SYSPROTO_H_
3250 struct fhopen_args {
3251 	const struct fhandle *u_fhp;
3252 	int flags;
3253 };
3254 #endif
3255 int
3256 fhopen(p, uap)
3257 	struct proc *p;
3258 	struct fhopen_args /* {
3259 		syscallarg(const struct fhandle *) u_fhp;
3260 		syscallarg(int) flags;
3261 	} */ *uap;
3262 {
3263 	struct mount *mp;
3264 	struct vnode *vp;
3265 	struct fhandle fhp;
3266 	struct vattr vat;
3267 	struct vattr *vap = &vat;
3268 	struct flock lf;
3269 	struct file *fp;
3270 	register struct filedesc *fdp = p->p_fd;
3271 	int fmode, mode, error, type;
3272 	struct file *nfp;
3273 	int indx;
3274 
3275 	/*
3276 	 * Must be super user
3277 	 */
3278 	error = suser(p);
3279 	if (error)
3280 		return (error);
3281 
3282 	fmode = FFLAGS(SCARG(uap, flags));
3283 	/* why not allow a non-read/write open for our lockd? */
3284 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3285 		return (EINVAL);
3286 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3287 	if (error)
3288 		return(error);
3289 	/* find the mount point */
3290 	mp = vfs_getvfs(&fhp.fh_fsid);
3291 	if (mp == NULL)
3292 		return (ESTALE);
3293 	/* now give me my vnode, it gets returned to me locked */
3294 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3295 	if (error)
3296 		return (error);
3297  	/*
3298 	 * from now on we have to make sure not
3299 	 * to forget about the vnode
3300 	 * any error that causes an abort must vput(vp)
3301 	 * just set error = err and 'goto bad;'.
3302 	 */
3303 
3304 	/*
3305 	 * from vn_open
3306 	 */
3307 	if (vp->v_type == VLNK) {
3308 		error = EMLINK;
3309 		goto bad;
3310 	}
3311 	if (vp->v_type == VSOCK) {
3312 		error = EOPNOTSUPP;
3313 		goto bad;
3314 	}
3315 	mode = 0;
3316 	if (fmode & (FWRITE | O_TRUNC)) {
3317 		if (vp->v_type == VDIR) {
3318 			error = EISDIR;
3319 			goto bad;
3320 		}
3321 		error = vn_writechk(vp);
3322 		if (error)
3323 			goto bad;
3324 		mode |= VWRITE;
3325 	}
3326 	if (fmode & FREAD)
3327 		mode |= VREAD;
3328 	if (mode) {
3329 		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3330 		if (error)
3331 			goto bad;
3332 	}
3333 	if (fmode & O_TRUNC) {
3334 		VOP_UNLOCK(vp, 0, p);				/* XXX */
3335 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3336 			vrele(vp);
3337 			return (error);
3338 		}
3339 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3340 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3341 		VATTR_NULL(vap);
3342 		vap->va_size = 0;
3343 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3344 		vn_finished_write(mp);
3345 		if (error)
3346 			goto bad;
3347 	}
3348 	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3349 	if (error)
3350 		goto bad;
3351 	/*
3352 	 * Make sure that a VM object is created for VMIO support.
3353 	 */
3354 	if (vn_canvmio(vp) == TRUE) {
3355 		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3356 			goto bad;
3357 	}
3358 	if (fmode & FWRITE)
3359 		vp->v_writecount++;
3360 
3361 	/*
3362 	 * end of vn_open code
3363 	 */
3364 
3365 	if ((error = falloc(p, &nfp, &indx)) != 0)
3366 		goto bad;
3367 	fp = nfp;
3368 	nfp->f_data = (caddr_t)vp;
3369 	nfp->f_flag = fmode & FMASK;
3370 	nfp->f_ops = &vnops;
3371 	nfp->f_type = DTYPE_VNODE;
3372 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3373 		lf.l_whence = SEEK_SET;
3374 		lf.l_start = 0;
3375 		lf.l_len = 0;
3376 		if (fmode & O_EXLOCK)
3377 			lf.l_type = F_WRLCK;
3378 		else
3379 			lf.l_type = F_RDLCK;
3380 		type = F_FLOCK;
3381 		if ((fmode & FNONBLOCK) == 0)
3382 			type |= F_WAIT;
3383 		VOP_UNLOCK(vp, 0, p);
3384 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3385 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
3386 			ffree(fp);
3387 			fdp->fd_ofiles[indx] = NULL;
3388 			return (error);
3389 		}
3390 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3391 		fp->f_flag |= FHASLOCK;
3392 	}
3393 	if ((vp->v_type == VREG) && (vp->v_object == NULL))
3394 		vfs_object_create(vp, p, p->p_ucred);
3395 
3396 	VOP_UNLOCK(vp, 0, p);
3397 	p->p_retval[0] = indx;
3398 	return (0);
3399 
3400 bad:
3401 	vput(vp);
3402 	return (error);
3403 }
3404 
3405 #ifndef _SYS_SYSPROTO_H_
3406 struct fhstat_args {
3407 	struct fhandle *u_fhp;
3408 	struct stat *sb;
3409 };
3410 #endif
3411 int
3412 fhstat(p, uap)
3413 	struct proc *p;
3414 	register struct fhstat_args /* {
3415 		syscallarg(struct fhandle *) u_fhp;
3416 		syscallarg(struct stat *) sb;
3417 	} */ *uap;
3418 {
3419 	struct stat sb;
3420 	fhandle_t fh;
3421 	struct mount *mp;
3422 	struct vnode *vp;
3423 	int error;
3424 
3425 	/*
3426 	 * Must be super user
3427 	 */
3428 	error = suser(p);
3429 	if (error)
3430 		return (error);
3431 
3432 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3433 	if (error)
3434 		return (error);
3435 
3436 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3437 		return (ESTALE);
3438 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3439 		return (error);
3440 	error = vn_stat(vp, &sb, p);
3441 	vput(vp);
3442 	if (error)
3443 		return (error);
3444 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3445 	return (error);
3446 }
3447 
3448 #ifndef _SYS_SYSPROTO_H_
3449 struct fhstatfs_args {
3450 	struct fhandle *u_fhp;
3451 	struct statfs *buf;
3452 };
3453 #endif
3454 int
3455 fhstatfs(p, uap)
3456 	struct proc *p;
3457 	struct fhstatfs_args /* {
3458 		syscallarg(struct fhandle) *u_fhp;
3459 		syscallarg(struct statfs) *buf;
3460 	} */ *uap;
3461 {
3462 	struct statfs *sp;
3463 	struct mount *mp;
3464 	struct vnode *vp;
3465 	struct statfs sb;
3466 	fhandle_t fh;
3467 	int error;
3468 
3469 	/*
3470 	 * Must be super user
3471 	 */
3472 	if ((error = suser(p)))
3473 		return (error);
3474 
3475 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3476 		return (error);
3477 
3478 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3479 		return (ESTALE);
3480 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3481 		return (error);
3482 	mp = vp->v_mount;
3483 	sp = &mp->mnt_stat;
3484 	vput(vp);
3485 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3486 		return (error);
3487 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3488 	if (suser_xxx(p->p_ucred, 0, 0)) {
3489 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3490 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3491 		sp = &sb;
3492 	}
3493 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3494 }
3495 
3496 /*
3497  * Syscall to push extended attribute configuration information into the
3498  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3499  * a command (int cmd), and attribute name and misc data.  For now, the
3500  * attribute name is left in userspace for consumption by the VFS_op.
3501  * It will probably be changed to be copied into sysspace by the
3502  * syscall in the future, once issues with various consumers of the
3503  * attribute code have raised their hands.
3504  *
3505  * Currently this is used only by UFS Extended Attributes.
3506  */
3507 int
3508 extattrctl(p, uap)
3509 	struct proc *p;
3510 	struct extattrctl_args *uap;
3511 {
3512 	struct nameidata nd;
3513 	struct mount *mp;
3514 	int error;
3515 
3516 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3517 	if ((error = namei(&nd)) != 0)
3518 		return (error);
3519 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3520 	NDFREE(&nd, 0);
3521 	vrele(nd.ni_vp);
3522 	if (error)
3523 		return (error);
3524 	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
3525 	    SCARG(uap, arg), p);
3526 	vn_finished_write(mp);
3527 	return (error);
3528 }
3529 
3530 /*
3531  * Syscall to set a named extended attribute on a file or directory.
3532  * Accepts attribute name, and a uio structure pointing to the data to set.
3533  * The uio is consumed in the style of writev().  The real work happens
3534  * in VOP_SETEXTATTR().
3535  */
3536 int
3537 extattr_set_file(p, uap)
3538 	struct proc *p;
3539 	struct extattr_set_file_args *uap;
3540 {
3541 	struct nameidata nd;
3542 	struct mount *mp;
3543 	struct uio auio;
3544 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3545 	char attrname[EXTATTR_MAXNAMELEN];
3546 	u_int iovlen, cnt;
3547 	int error, i;
3548 
3549 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3550 	if (error)
3551 		return (error);
3552 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3553 	if ((error = namei(&nd)) != 0)
3554 		return(error);
3555 	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
3556 		goto done;
3557 	iovlen = uap->iovcnt * sizeof(struct iovec);
3558 	if (uap->iovcnt > UIO_SMALLIOV) {
3559 		if (uap->iovcnt > UIO_MAXIOV) {
3560 			error = EINVAL;
3561 			goto done;
3562 		}
3563 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3564 		needfree = iov;
3565 	} else
3566 		iov = aiov;
3567 	auio.uio_iov = iov;
3568 	auio.uio_iovcnt = uap->iovcnt;
3569 	auio.uio_rw = UIO_WRITE;
3570 	auio.uio_segflg = UIO_USERSPACE;
3571 	auio.uio_procp = p;
3572 	auio.uio_offset = 0;
3573 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3574 		goto done;
3575 	auio.uio_resid = 0;
3576 	for (i = 0; i < uap->iovcnt; i++) {
3577 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3578 			error = EINVAL;
3579 			goto done;
3580 		}
3581 		auio.uio_resid += iov->iov_len;
3582 		iov++;
3583 	}
3584 	cnt = auio.uio_resid;
3585 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3586 	    p);
3587 	if (auio.uio_resid != cnt && (error == ERESTART ||
3588 	    error == EINTR || error == EWOULDBLOCK))
3589 		error = 0;
3590 	cnt -= auio.uio_resid;
3591 	p->p_retval[0] = cnt;
3592 done:
3593 	if (needfree)
3594 		FREE(needfree, M_IOV);
3595 	NDFREE(&nd, 0);
3596 	vrele(nd.ni_vp);
3597 	vn_finished_write(mp);
3598 	return (error);
3599 }
3600 
3601 /*
3602  * Syscall to get a named extended attribute on a file or directory.
3603  * Accepts attribute name, and a uio structure pointing to a buffer for the
3604  * data.  The uio is consumed in the style of readv().  The real work
3605  * happens in VOP_GETEXTATTR();
3606  */
3607 int
3608 extattr_get_file(p, uap)
3609 	struct proc *p;
3610 	struct extattr_get_file_args *uap;
3611 {
3612 	struct nameidata nd;
3613 	struct uio auio;
3614 	struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
3615 	char attrname[EXTATTR_MAXNAMELEN];
3616 	u_int iovlen, cnt;
3617 	int error, i;
3618 
3619 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3620 	if (error)
3621 		return (error);
3622 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3623 	if ((error = namei(&nd)) != 0)
3624 		return (error);
3625 	iovlen = uap->iovcnt * sizeof (struct iovec);
3626 	if (uap->iovcnt > UIO_SMALLIOV) {
3627 		if (uap->iovcnt > UIO_MAXIOV) {
3628 			NDFREE(&nd, 0);
3629 			vrele(nd.ni_vp);
3630 			return (EINVAL);
3631 		}
3632 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3633 		needfree = iov;
3634 	} else {
3635 		iov = aiov;
3636 		needfree = NULL;
3637 	}
3638 	auio.uio_iov = iov;
3639 	auio.uio_iovcnt = uap->iovcnt;
3640 	auio.uio_rw = UIO_READ;
3641 	auio.uio_segflg = UIO_USERSPACE;
3642 	auio.uio_procp = p;
3643 	auio.uio_offset = 0;
3644 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
3645 		goto done;
3646 	auio.uio_resid = 0;
3647 	for (i = 0; i < uap->iovcnt; i++) {
3648 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3649 			error = EINVAL;
3650 			goto done;
3651 		}
3652 		auio.uio_resid += iov->iov_len;
3653 		iov++;
3654 	}
3655 	cnt = auio.uio_resid;
3656 	error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred,
3657 	    p);
3658 	if (auio.uio_resid != cnt && (error == ERESTART ||
3659 	    error == EINTR || error == EWOULDBLOCK))
3660 		error = 0;
3661 	cnt -= auio.uio_resid;
3662 	p->p_retval[0] = cnt;
3663 done:
3664 	if (needfree)
3665 		FREE(needfree, M_IOV);
3666 	NDFREE(&nd, 0);
3667 	vrele(nd.ni_vp);
3668 	return(error);
3669 }
3670 
3671 /*
3672  * Syscall to delete a named extended attribute from a file or directory.
3673  * Accepts attribute name.  The real work happens in VOP_SETEXTATTR().
3674  */
3675 int
3676 extattr_delete_file(p, uap)
3677 	struct proc *p;
3678 	struct extattr_delete_file_args *uap;
3679 {
3680 	struct mount *mp;
3681 	struct nameidata nd;
3682 	char attrname[EXTATTR_MAXNAMELEN];
3683 	int	error;
3684 
3685 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
3686 	if (error)
3687 		return(error);
3688 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3689 	if ((error = namei(&nd)) != 0)
3690 		return(error);
3691 	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
3692 		vrele(nd.ni_vp);
3693 		return (error);
3694 	}
3695 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
3696 	    p);
3697 	NDFREE(&nd, 0);
3698 	vrele(nd.ni_vp);
3699 	vn_finished_write(mp);
3700 	return(error);
3701 }
3702