xref: /freebsd/sys/kern/vfs_extattr.c (revision c17d43407fe04133a94055b0dbc7ea8965654a9f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/uma.h>
77 
78 static int change_dir(struct nameidata *ndp, struct thread *td);
79 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int	usermount = 0;	/* if 1, non-root can mount fs. */
91 
92 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93 
94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
95 
96 /*
97  * Virtual File System System Calls
98  */
99 
100 #ifndef _SYS_SYSPROTO_H_
101 struct nmount_args {
102 	struct iovec    *iovp;
103 	unsigned int    iovcnt;
104 	int             flags;
105 	};
106 #endif
107 /* ARGSUSED */
108 int
109 nmount(td, uap)
110 	struct thread *td;
111 	struct nmount_args /* {
112 		syscallarg(struct iovec *) iovp;
113 		syscallarg(unsigned int) iovcnt;
114 		syscallarg(int) flags;
115 	} */ *uap;
116 {
117 
118 	return(EOPNOTSUPP);
119 }
120 
121 /*
122  * Mount a file system.
123  */
124 #ifndef _SYS_SYSPROTO_H_
125 struct mount_args {
126 	char	*type;
127 	char	*path;
128 	int	flags;
129 	caddr_t	data;
130 };
131 #endif
132 /* ARGSUSED */
133 int
134 mount(td, uap)
135 	struct thread *td;
136 	struct mount_args /* {
137 		syscallarg(char *) type;
138 		syscallarg(char *) path;
139 		syscallarg(int) flags;
140 		syscallarg(caddr_t) data;
141 	} */ *uap;
142 {
143 	char *fstype;
144 	char *fspath;
145 	int error;
146 
147 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
148 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
149 
150 	/*
151 	 * vfs_mount() actually takes a kernel string for `type' and
152 	 * `path' now, so extract them.
153 	 */
154 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
155 	if (error)
156 		goto finish;
157 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
158 	if (error)
159 		goto finish;
160 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
161 	    SCARG(uap, data));
162 finish:
163 	free(fstype, M_TEMP);
164 	free(fspath, M_TEMP);
165 	return (error);
166 }
167 
168 /*
169  * vfs_mount(): actually attempt a filesystem mount.
170  *
171  * This routine is designed to be a "generic" entry point for routines
172  * that wish to mount a filesystem. All parameters except `fsdata' are
173  * pointers into kernel space. `fsdata' is currently still a pointer
174  * into userspace.
175  */
176 int
177 vfs_mount(td, fstype, fspath, fsflags, fsdata)
178 	struct thread *td;
179 	const char *fstype;
180 	char *fspath;
181 	int fsflags;
182 	void *fsdata;
183 {
184 	struct vnode *vp;
185 	struct mount *mp;
186 	struct vfsconf *vfsp;
187 	int error, flag = 0, flag2 = 0;
188 	struct vattr va;
189 	struct nameidata nd;
190 
191 	/*
192 	 * Be ultra-paranoid about making sure the type and fspath
193 	 * variables will fit in our mp buffers, including the
194 	 * terminating NUL.
195 	 */
196 	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
197 	    (strlen(fspath) >= MNAMELEN - 1))
198 		return (ENAMETOOLONG);
199 
200 	if (usermount == 0) {
201 		error = suser_td(td);
202 		if (error)
203 			return (error);
204 	}
205 	/*
206 	 * Do not allow NFS export by non-root users.
207 	 */
208 	if (fsflags & MNT_EXPORTED) {
209 		error = suser_td(td);
210 		if (error)
211 			return (error);
212 	}
213 	/*
214 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
215 	 */
216 	if (suser_xxx(td->td_ucred, 0, 0))
217 		fsflags |= MNT_NOSUID | MNT_NODEV;
218 	/*
219 	 * Get vnode to be covered
220 	 */
221 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
222 	if ((error = namei(&nd)) != 0)
223 		return (error);
224 	NDFREE(&nd, NDF_ONLY_PNBUF);
225 	vp = nd.ni_vp;
226 	if (fsflags & MNT_UPDATE) {
227 		if ((vp->v_flag & VROOT) == 0) {
228 			vput(vp);
229 			return (EINVAL);
230 		}
231 		mp = vp->v_mount;
232 		flag = mp->mnt_flag;
233 		flag2 = mp->mnt_kern_flag;
234 		/*
235 		 * We only allow the filesystem to be reloaded if it
236 		 * is currently mounted read-only.
237 		 */
238 		if ((fsflags & MNT_RELOAD) &&
239 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
240 			vput(vp);
241 			return (EOPNOTSUPP);	/* Needs translation */
242 		}
243 		/*
244 		 * Only root, or the user that did the original mount is
245 		 * permitted to update it.
246 		 */
247 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
248 			error = suser_td(td);
249 			if (error) {
250 				vput(vp);
251 				return (error);
252 			}
253 		}
254 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
255 			vput(vp);
256 			return (EBUSY);
257 		}
258 		mtx_lock(&vp->v_interlock);
259 		if ((vp->v_flag & VMOUNT) != 0 ||
260 		    vp->v_mountedhere != NULL) {
261 			mtx_unlock(&vp->v_interlock);
262 			vfs_unbusy(mp, td);
263 			vput(vp);
264 			return (EBUSY);
265 		}
266 		vp->v_flag |= VMOUNT;
267 		mtx_unlock(&vp->v_interlock);
268 		mp->mnt_flag |= fsflags &
269 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
270 		VOP_UNLOCK(vp, 0, td);
271 		goto update;
272 	}
273 	/*
274 	 * If the user is not root, ensure that they own the directory
275 	 * onto which we are attempting to mount.
276 	 */
277 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
278 	if (error) {
279 		vput(vp);
280 		return (error);
281 	}
282 	if (va.va_uid != td->td_ucred->cr_uid) {
283 		error = suser_td(td);
284 		if (error) {
285 			vput(vp);
286 			return (error);
287 		}
288 	}
289 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0))
290 	    != 0) {
291 		vput(vp);
292 		return (error);
293 	}
294 	if (vp->v_type != VDIR) {
295 		vput(vp);
296 		return (ENOTDIR);
297 	}
298 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
299 		if (!strcmp(vfsp->vfc_name, fstype))
300 			break;
301 	if (vfsp == NULL) {
302 		linker_file_t lf;
303 
304 		/* Only load modules for root (very important!) */
305 		error = suser_td(td);
306 		if (error) {
307 			vput(vp);
308 			return error;
309 		}
310 		error = linker_load_file(fstype, &lf);
311 		if (error || lf == NULL) {
312 			vput(vp);
313 			if (lf == NULL)
314 				error = ENODEV;
315 			return error;
316 		}
317 		lf->userrefs++;
318 		/* lookup again, see if the VFS was loaded */
319 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
320 			if (!strcmp(vfsp->vfc_name, fstype))
321 				break;
322 		if (vfsp == NULL) {
323 			lf->userrefs--;
324 			linker_file_unload(lf);
325 			vput(vp);
326 			return (ENODEV);
327 		}
328 	}
329 	mtx_lock(&vp->v_interlock);
330 	if ((vp->v_flag & VMOUNT) != 0 ||
331 	    vp->v_mountedhere != NULL) {
332 		mtx_unlock(&vp->v_interlock);
333 		vput(vp);
334 		return (EBUSY);
335 	}
336 	vp->v_flag |= VMOUNT;
337 	mtx_unlock(&vp->v_interlock);
338 
339 	/*
340 	 * Allocate and initialize the filesystem.
341 	 */
342 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
343 	TAILQ_INIT(&mp->mnt_nvnodelist);
344 	TAILQ_INIT(&mp->mnt_reservedvnlist);
345 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
346 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
347 	mp->mnt_op = vfsp->vfc_vfsops;
348 	mp->mnt_vfc = vfsp;
349 	vfsp->vfc_refcount++;
350 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
351 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
352 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
353 	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
354 	mp->mnt_vnodecovered = vp;
355 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
356 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
357 	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
358 	mp->mnt_iosize_max = DFLTPHYS;
359 	VOP_UNLOCK(vp, 0, td);
360 update:
361 	/*
362 	 * Set the mount level flags.
363 	 */
364 	if (fsflags & MNT_RDONLY)
365 		mp->mnt_flag |= MNT_RDONLY;
366 	else if (mp->mnt_flag & MNT_RDONLY)
367 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
368 	mp->mnt_flag &=~ MNT_UPDATEMASK;
369 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
370 	/*
371 	 * Mount the filesystem.
372 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
373 	 * get.  No freeing of cn_pnbuf.
374 	 */
375 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
376 	if (mp->mnt_flag & MNT_UPDATE) {
377 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
378 			mp->mnt_flag &= ~MNT_RDONLY;
379 		mp->mnt_flag &=~
380 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
381 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
382 		if (error) {
383 			mp->mnt_flag = flag;
384 			mp->mnt_kern_flag = flag2;
385 		}
386 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
387 			if (mp->mnt_syncer == NULL)
388 				error = vfs_allocate_syncvnode(mp);
389 		} else {
390 			if (mp->mnt_syncer != NULL)
391 				vrele(mp->mnt_syncer);
392 			mp->mnt_syncer = NULL;
393 		}
394 		vfs_unbusy(mp, td);
395 		mtx_lock(&vp->v_interlock);
396 		vp->v_flag &= ~VMOUNT;
397 		mtx_unlock(&vp->v_interlock);
398 		vrele(vp);
399 		return (error);
400 	}
401 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
402 	/*
403 	 * Put the new filesystem on the mount list after root.
404 	 */
405 	cache_purge(vp);
406 	if (!error) {
407 		struct vnode *newdp;
408 
409 		mtx_lock(&vp->v_interlock);
410 		vp->v_flag &= ~VMOUNT;
411 		vp->v_mountedhere = mp;
412 		mtx_unlock(&vp->v_interlock);
413 		mtx_lock(&mountlist_mtx);
414 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
415 		mtx_unlock(&mountlist_mtx);
416 		if (VFS_ROOT(mp, &newdp))
417 			panic("mount: lost mount");
418 		checkdirs(vp, newdp);
419 		vput(newdp);
420 		VOP_UNLOCK(vp, 0, td);
421 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
422 			error = vfs_allocate_syncvnode(mp);
423 		vfs_unbusy(mp, td);
424 		if ((error = VFS_START(mp, 0, td)) != 0)
425 			vrele(vp);
426 	} else {
427 		mtx_lock(&vp->v_interlock);
428 		vp->v_flag &= ~VMOUNT;
429 		mtx_unlock(&vp->v_interlock);
430 		mp->mnt_vfc->vfc_refcount--;
431 		vfs_unbusy(mp, td);
432 		free((caddr_t)mp, M_MOUNT);
433 		vput(vp);
434 	}
435 	return (error);
436 }
437 
438 /*
439  * Scan all active processes to see if any of them have a current
440  * or root directory of `olddp'. If so, replace them with the new
441  * mount point.
442  */
443 static void
444 checkdirs(olddp, newdp)
445 	struct vnode *olddp, *newdp;
446 {
447 	struct filedesc *fdp;
448 	struct proc *p;
449 	int nrele;
450 
451 	if (olddp->v_usecount == 1)
452 		return;
453 	sx_slock(&allproc_lock);
454 	LIST_FOREACH(p, &allproc, p_list) {
455 		PROC_LOCK(p);
456 		fdp = p->p_fd;
457 		if (fdp == NULL) {
458 			PROC_UNLOCK(p);
459 			continue;
460 		}
461 		nrele = 0;
462 		FILEDESC_LOCK(fdp);
463 		if (fdp->fd_cdir == olddp) {
464 			VREF(newdp);
465 			fdp->fd_cdir = newdp;
466 			nrele++;
467 		}
468 		if (fdp->fd_rdir == olddp) {
469 			VREF(newdp);
470 			fdp->fd_rdir = newdp;
471 			nrele++;
472 		}
473 		FILEDESC_UNLOCK(fdp);
474 		PROC_UNLOCK(p);
475 		while (nrele--)
476 			vrele(olddp);
477 	}
478 	sx_sunlock(&allproc_lock);
479 	if (rootvnode == olddp) {
480 		vrele(rootvnode);
481 		VREF(newdp);
482 		rootvnode = newdp;
483 	}
484 }
485 
486 /*
487  * Unmount a file system.
488  *
489  * Note: unmount takes a path to the vnode mounted on as argument,
490  * not special file (as before).
491  */
492 #ifndef _SYS_SYSPROTO_H_
493 struct unmount_args {
494 	char	*path;
495 	int	flags;
496 };
497 #endif
498 /* ARGSUSED */
499 int
500 unmount(td, uap)
501 	struct thread *td;
502 	register struct unmount_args /* {
503 		syscallarg(char *) path;
504 		syscallarg(int) flags;
505 	} */ *uap;
506 {
507 	register struct vnode *vp;
508 	struct mount *mp;
509 	int error;
510 	struct nameidata nd;
511 
512 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
513 	    SCARG(uap, path), td);
514 	if ((error = namei(&nd)) != 0)
515 		return (error);
516 	vp = nd.ni_vp;
517 	NDFREE(&nd, NDF_ONLY_PNBUF);
518 	mp = vp->v_mount;
519 
520 	/*
521 	 * Only root, or the user that did the original mount is
522 	 * permitted to unmount this filesystem.
523 	 */
524 	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
525 		error = suser_td(td);
526 		if (error) {
527 			vput(vp);
528 			return (error);
529 		}
530 	}
531 
532 	/*
533 	 * Don't allow unmounting the root file system.
534 	 */
535 	if (mp->mnt_flag & MNT_ROOTFS) {
536 		vput(vp);
537 		return (EINVAL);
538 	}
539 
540 	/*
541 	 * Must be the root of the filesystem
542 	 */
543 	if ((vp->v_flag & VROOT) == 0) {
544 		vput(vp);
545 		return (EINVAL);
546 	}
547 	vput(vp);
548 	return (dounmount(mp, SCARG(uap, flags), td));
549 }
550 
551 /*
552  * Do the actual file system unmount.
553  */
554 int
555 dounmount(mp, flags, td)
556 	struct mount *mp;
557 	int flags;
558 	struct thread *td;
559 {
560 	struct vnode *coveredvp, *fsrootvp;
561 	int error;
562 	int async_flag;
563 
564 	mtx_lock(&mountlist_mtx);
565 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
566 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
567 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
568 	if (error) {
569 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
570 		if (mp->mnt_kern_flag & MNTK_MWAIT)
571 			wakeup((caddr_t)mp);
572 		return (error);
573 	}
574 	vn_start_write(NULL, &mp, V_WAIT);
575 
576 	if (mp->mnt_flag & MNT_EXPUBLIC)
577 		vfs_setpublicfs(NULL, NULL, NULL);
578 
579 	vfs_msync(mp, MNT_WAIT);
580 	async_flag = mp->mnt_flag & MNT_ASYNC;
581 	mp->mnt_flag &=~ MNT_ASYNC;
582 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
583 	if (mp->mnt_syncer != NULL)
584 		vrele(mp->mnt_syncer);
585 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
586 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
587 		if (mp->mnt_vnodecovered != NULL)
588 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
589 		if (fsrootvp == rootvnode) {
590 			vrele(rootvnode);
591 			rootvnode = NULL;
592 		}
593 		vput(fsrootvp);
594 	}
595 	if (((mp->mnt_flag & MNT_RDONLY) ||
596 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
597 	    (flags & MNT_FORCE)) {
598 		error = VFS_UNMOUNT(mp, flags, td);
599 	}
600 	vn_finished_write(mp);
601 	if (error) {
602 		/* Undo cdir/rdir and rootvnode changes made above. */
603 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
604 			if (mp->mnt_vnodecovered != NULL)
605 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
606 			if (rootvnode == NULL) {
607 				rootvnode = fsrootvp;
608 				vref(rootvnode);
609 			}
610 			vput(fsrootvp);
611 		}
612 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
613 			(void) vfs_allocate_syncvnode(mp);
614 		mtx_lock(&mountlist_mtx);
615 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
616 		mp->mnt_flag |= async_flag;
617 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
618 		    &mountlist_mtx, td);
619 		if (mp->mnt_kern_flag & MNTK_MWAIT)
620 			wakeup((caddr_t)mp);
621 		return (error);
622 	}
623 	mtx_lock(&mountlist_mtx);
624 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
625 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
626 		coveredvp->v_mountedhere = NULL;
627 	mp->mnt_vfc->vfc_refcount--;
628 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
629 		panic("unmount: dangling vnode");
630 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
631 	lockdestroy(&mp->mnt_lock);
632 	if (coveredvp != NULL)
633 		vrele(coveredvp);
634 	if (mp->mnt_kern_flag & MNTK_MWAIT)
635 		wakeup((caddr_t)mp);
636 	free((caddr_t)mp, M_MOUNT);
637 	return (0);
638 }
639 
640 /*
641  * Sync each mounted filesystem.
642  */
643 #ifndef _SYS_SYSPROTO_H_
644 struct sync_args {
645         int     dummy;
646 };
647 #endif
648 
649 #ifdef DEBUG
650 static int syncprt = 0;
651 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
652 #endif
653 
654 /* ARGSUSED */
655 int
656 sync(td, uap)
657 	struct thread *td;
658 	struct sync_args *uap;
659 {
660 	struct mount *mp, *nmp;
661 	int asyncflag;
662 
663 	mtx_lock(&mountlist_mtx);
664 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
665 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
666 			nmp = TAILQ_NEXT(mp, mnt_list);
667 			continue;
668 		}
669 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
670 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
671 			asyncflag = mp->mnt_flag & MNT_ASYNC;
672 			mp->mnt_flag &= ~MNT_ASYNC;
673 			vfs_msync(mp, MNT_NOWAIT);
674 			VFS_SYNC(mp, MNT_NOWAIT,
675 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
676 			mp->mnt_flag |= asyncflag;
677 			vn_finished_write(mp);
678 		}
679 		mtx_lock(&mountlist_mtx);
680 		nmp = TAILQ_NEXT(mp, mnt_list);
681 		vfs_unbusy(mp, td);
682 	}
683 	mtx_unlock(&mountlist_mtx);
684 #if 0
685 /*
686  * XXX don't call vfs_bufstats() yet because that routine
687  * was not imported in the Lite2 merge.
688  */
689 #ifdef DIAGNOSTIC
690 	if (syncprt)
691 		vfs_bufstats();
692 #endif /* DIAGNOSTIC */
693 #endif
694 	return (0);
695 }
696 
697 /* XXX PRISON: could be per prison flag */
698 static int prison_quotas;
699 #if 0
700 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
701 #endif
702 
703 /*
704  * Change filesystem quotas.
705  */
706 #ifndef _SYS_SYSPROTO_H_
707 struct quotactl_args {
708 	char *path;
709 	int cmd;
710 	int uid;
711 	caddr_t arg;
712 };
713 #endif
714 /* ARGSUSED */
715 int
716 quotactl(td, uap)
717 	struct thread *td;
718 	register struct quotactl_args /* {
719 		syscallarg(char *) path;
720 		syscallarg(int) cmd;
721 		syscallarg(int) uid;
722 		syscallarg(caddr_t) arg;
723 	} */ *uap;
724 {
725 	struct mount *mp;
726 	int error;
727 	struct nameidata nd;
728 
729 	if (jailed(td->td_ucred) && !prison_quotas)
730 		return (EPERM);
731 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
732 	if ((error = namei(&nd)) != 0)
733 		return (error);
734 	NDFREE(&nd, NDF_ONLY_PNBUF);
735 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
736 	vrele(nd.ni_vp);
737 	if (error)
738 		return (error);
739 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
740 	    SCARG(uap, arg), td);
741 	vn_finished_write(mp);
742 	return (error);
743 }
744 
745 /*
746  * Get filesystem statistics.
747  */
748 #ifndef _SYS_SYSPROTO_H_
749 struct statfs_args {
750 	char *path;
751 	struct statfs *buf;
752 };
753 #endif
754 /* ARGSUSED */
755 int
756 statfs(td, uap)
757 	struct thread *td;
758 	register struct statfs_args /* {
759 		syscallarg(char *) path;
760 		syscallarg(struct statfs *) buf;
761 	} */ *uap;
762 {
763 	register struct mount *mp;
764 	register struct statfs *sp;
765 	int error;
766 	struct nameidata nd;
767 	struct statfs sb;
768 
769 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
770 	if ((error = namei(&nd)) != 0)
771 		return (error);
772 	mp = nd.ni_vp->v_mount;
773 	sp = &mp->mnt_stat;
774 	NDFREE(&nd, NDF_ONLY_PNBUF);
775 	vrele(nd.ni_vp);
776 	error = VFS_STATFS(mp, sp, td);
777 	if (error)
778 		return (error);
779 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
780 	if (suser_xxx(td->td_ucred, 0, 0)) {
781 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
782 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
783 		sp = &sb;
784 	}
785 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
786 }
787 
788 /*
789  * Get filesystem statistics.
790  */
791 #ifndef _SYS_SYSPROTO_H_
792 struct fstatfs_args {
793 	int fd;
794 	struct statfs *buf;
795 };
796 #endif
797 /* ARGSUSED */
798 int
799 fstatfs(td, uap)
800 	struct thread *td;
801 	register struct fstatfs_args /* {
802 		syscallarg(int) fd;
803 		syscallarg(struct statfs *) buf;
804 	} */ *uap;
805 {
806 	struct file *fp;
807 	struct mount *mp;
808 	register struct statfs *sp;
809 	int error;
810 	struct statfs sb;
811 
812 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
813 		return (error);
814 	mp = ((struct vnode *)fp->f_data)->v_mount;
815 	fdrop(fp, td);
816 	if (mp == NULL)
817 		return (EBADF);
818 	sp = &mp->mnt_stat;
819 	error = VFS_STATFS(mp, sp, td);
820 	if (error)
821 		return (error);
822 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
823 	if (suser_xxx(td->td_ucred, 0, 0)) {
824 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
825 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
826 		sp = &sb;
827 	}
828 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
829 }
830 
831 /*
832  * Get statistics on all filesystems.
833  */
834 #ifndef _SYS_SYSPROTO_H_
835 struct getfsstat_args {
836 	struct statfs *buf;
837 	long bufsize;
838 	int flags;
839 };
840 #endif
841 int
842 getfsstat(td, uap)
843 	struct thread *td;
844 	register struct getfsstat_args /* {
845 		syscallarg(struct statfs *) buf;
846 		syscallarg(long) bufsize;
847 		syscallarg(int) flags;
848 	} */ *uap;
849 {
850 	register struct mount *mp, *nmp;
851 	register struct statfs *sp;
852 	caddr_t sfsp;
853 	long count, maxcount, error;
854 
855 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
856 	sfsp = (caddr_t)SCARG(uap, buf);
857 	count = 0;
858 	mtx_lock(&mountlist_mtx);
859 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
860 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
861 			nmp = TAILQ_NEXT(mp, mnt_list);
862 			continue;
863 		}
864 		if (sfsp && count < maxcount) {
865 			sp = &mp->mnt_stat;
866 			/*
867 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
868 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
869 			 * overrides MNT_WAIT.
870 			 */
871 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
872 			    (SCARG(uap, flags) & MNT_WAIT)) &&
873 			    (error = VFS_STATFS(mp, sp, td))) {
874 				mtx_lock(&mountlist_mtx);
875 				nmp = TAILQ_NEXT(mp, mnt_list);
876 				vfs_unbusy(mp, td);
877 				continue;
878 			}
879 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
880 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
881 			if (error) {
882 				vfs_unbusy(mp, td);
883 				return (error);
884 			}
885 			sfsp += sizeof(*sp);
886 		}
887 		count++;
888 		mtx_lock(&mountlist_mtx);
889 		nmp = TAILQ_NEXT(mp, mnt_list);
890 		vfs_unbusy(mp, td);
891 	}
892 	mtx_unlock(&mountlist_mtx);
893 	if (sfsp && count > maxcount)
894 		td->td_retval[0] = maxcount;
895 	else
896 		td->td_retval[0] = count;
897 	return (0);
898 }
899 
900 /*
901  * Change current working directory to a given file descriptor.
902  */
903 #ifndef _SYS_SYSPROTO_H_
904 struct fchdir_args {
905 	int	fd;
906 };
907 #endif
908 /* ARGSUSED */
909 int
910 fchdir(td, uap)
911 	struct thread *td;
912 	struct fchdir_args /* {
913 		syscallarg(int) fd;
914 	} */ *uap;
915 {
916 	register struct filedesc *fdp = td->td_proc->p_fd;
917 	struct vnode *vp, *tdp, *vpold;
918 	struct mount *mp;
919 	struct file *fp;
920 	int error;
921 
922 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
923 		return (error);
924 	vp = (struct vnode *)fp->f_data;
925 	VREF(vp);
926 	fdrop(fp, td);
927 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
928 	if (vp->v_type != VDIR)
929 		error = ENOTDIR;
930 	else
931 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
932 	while (!error && (mp = vp->v_mountedhere) != NULL) {
933 		if (vfs_busy(mp, 0, 0, td))
934 			continue;
935 		error = VFS_ROOT(mp, &tdp);
936 		vfs_unbusy(mp, td);
937 		if (error)
938 			break;
939 		vput(vp);
940 		vp = tdp;
941 	}
942 	if (error) {
943 		vput(vp);
944 		return (error);
945 	}
946 	VOP_UNLOCK(vp, 0, td);
947 	FILEDESC_LOCK(fdp);
948 	vpold = fdp->fd_cdir;
949 	fdp->fd_cdir = vp;
950 	FILEDESC_UNLOCK(fdp);
951 	vrele(vpold);
952 	return (0);
953 }
954 
955 /*
956  * Change current working directory (``.'').
957  */
958 #ifndef _SYS_SYSPROTO_H_
959 struct chdir_args {
960 	char	*path;
961 };
962 #endif
963 /* ARGSUSED */
964 int
965 chdir(td, uap)
966 	struct thread *td;
967 	struct chdir_args /* {
968 		syscallarg(char *) path;
969 	} */ *uap;
970 {
971 	register struct filedesc *fdp = td->td_proc->p_fd;
972 	int error;
973 	struct nameidata nd;
974 	struct vnode *vp;
975 
976 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
977 	    SCARG(uap, path), td);
978 	if ((error = change_dir(&nd, td)) != 0)
979 		return (error);
980 	NDFREE(&nd, NDF_ONLY_PNBUF);
981 	FILEDESC_LOCK(fdp);
982 	vp = fdp->fd_cdir;
983 	fdp->fd_cdir = nd.ni_vp;
984 	FILEDESC_UNLOCK(fdp);
985 	vrele(vp);
986 	return (0);
987 }
988 
989 /*
990  * Helper function for raised chroot(2) security function:  Refuse if
991  * any filedescriptors are open directories.
992  */
993 static int
994 chroot_refuse_vdir_fds(fdp)
995 	struct filedesc *fdp;
996 {
997 	struct vnode *vp;
998 	struct file *fp;
999 	int fd;
1000 
1001 	FILEDESC_LOCK(fdp);
1002 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1003 		fp = fget_locked(fdp, fd);
1004 		if (fp == NULL)
1005 			continue;
1006 		if (fp->f_type == DTYPE_VNODE) {
1007 			vp = (struct vnode *)fp->f_data;
1008 			if (vp->v_type == VDIR) {
1009 				FILEDESC_UNLOCK(fdp);
1010 				return (EPERM);
1011 			}
1012 		}
1013 	}
1014 	FILEDESC_UNLOCK(fdp);
1015 	return (0);
1016 }
1017 
1018 /*
1019  * This sysctl determines if we will allow a process to chroot(2) if it
1020  * has a directory open:
1021  *	0: disallowed for all processes.
1022  *	1: allowed for processes that were not already chroot(2)'ed.
1023  *	2: allowed for all processes.
1024  */
1025 
1026 static int chroot_allow_open_directories = 1;
1027 
1028 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1029      &chroot_allow_open_directories, 0, "");
1030 
1031 /*
1032  * Change notion of root (``/'') directory.
1033  */
1034 #ifndef _SYS_SYSPROTO_H_
1035 struct chroot_args {
1036 	char	*path;
1037 };
1038 #endif
1039 /* ARGSUSED */
1040 int
1041 chroot(td, uap)
1042 	struct thread *td;
1043 	struct chroot_args /* {
1044 		syscallarg(char *) path;
1045 	} */ *uap;
1046 {
1047 	register struct filedesc *fdp = td->td_proc->p_fd;
1048 	int error;
1049 	struct nameidata nd;
1050 	struct vnode *vp;
1051 
1052 	error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1053 	if (error)
1054 		return (error);
1055 	FILEDESC_LOCK(fdp);
1056 	if (chroot_allow_open_directories == 0 ||
1057 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1058 		FILEDESC_UNLOCK(fdp);
1059 		error = chroot_refuse_vdir_fds(fdp);
1060 	} else
1061 		FILEDESC_UNLOCK(fdp);
1062 	if (error)
1063 		return (error);
1064 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1065 	    SCARG(uap, path), td);
1066 	if ((error = change_dir(&nd, td)) != 0)
1067 		return (error);
1068 	NDFREE(&nd, NDF_ONLY_PNBUF);
1069 	FILEDESC_LOCK(fdp);
1070 	vp = fdp->fd_rdir;
1071 	fdp->fd_rdir = nd.ni_vp;
1072 	if (!fdp->fd_jdir) {
1073 		fdp->fd_jdir = nd.ni_vp;
1074                 VREF(fdp->fd_jdir);
1075 	}
1076 	FILEDESC_UNLOCK(fdp);
1077 	vrele(vp);
1078 	return (0);
1079 }
1080 
1081 /*
1082  * Common routine for chroot and chdir.
1083  */
1084 static int
1085 change_dir(ndp, td)
1086 	register struct nameidata *ndp;
1087 	struct thread *td;
1088 {
1089 	struct vnode *vp;
1090 	int error;
1091 
1092 	error = namei(ndp);
1093 	if (error)
1094 		return (error);
1095 	vp = ndp->ni_vp;
1096 	if (vp->v_type != VDIR)
1097 		error = ENOTDIR;
1098 	else
1099 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1100 	if (error)
1101 		vput(vp);
1102 	else
1103 		VOP_UNLOCK(vp, 0, td);
1104 	return (error);
1105 }
1106 
1107 /*
1108  * Check permissions, allocate an open file structure,
1109  * and call the device open routine if any.
1110  */
1111 #ifndef _SYS_SYSPROTO_H_
1112 struct open_args {
1113 	char	*path;
1114 	int	flags;
1115 	int	mode;
1116 };
1117 #endif
1118 int
1119 open(td, uap)
1120 	struct thread *td;
1121 	register struct open_args /* {
1122 		syscallarg(char *) path;
1123 		syscallarg(int) flags;
1124 		syscallarg(int) mode;
1125 	} */ *uap;
1126 {
1127 	struct proc *p = td->td_proc;
1128 	struct filedesc *fdp = p->p_fd;
1129 	struct file *fp;
1130 	struct vnode *vp;
1131 	struct vattr vat;
1132 	struct mount *mp;
1133 	int cmode, flags, oflags;
1134 	struct file *nfp;
1135 	int type, indx, error;
1136 	struct flock lf;
1137 	struct nameidata nd;
1138 
1139 	oflags = SCARG(uap, flags);
1140 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1141 		return (EINVAL);
1142 	flags = FFLAGS(oflags);
1143 	error = falloc(td, &nfp, &indx);
1144 	if (error)
1145 		return (error);
1146 	fp = nfp;
1147 	FILEDESC_LOCK(fdp);
1148 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1149 	FILEDESC_UNLOCK(fdp);
1150 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1151 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1152 	/*
1153 	 * Bump the ref count to prevent another process from closing
1154 	 * the descriptor while we are blocked in vn_open()
1155 	 */
1156 	fhold(fp);
1157 	error = vn_open(&nd, &flags, cmode);
1158 	if (error) {
1159 		/*
1160 		 * release our own reference
1161 		 */
1162 		fdrop(fp, td);
1163 
1164 		/*
1165 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1166 		 * responsible for dropping the old contents of ofiles[indx]
1167 		 * if it succeeds.
1168 		 */
1169 		if ((error == ENODEV || error == ENXIO) &&
1170 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1171 		    (error =
1172 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1173 			td->td_retval[0] = indx;
1174 			return (0);
1175 		}
1176 		/*
1177 		 * Clean up the descriptor, but only if another thread hadn't
1178 		 * replaced or closed it.
1179 		 */
1180 		FILEDESC_LOCK(fdp);
1181 		if (fdp->fd_ofiles[indx] == fp) {
1182 			fdp->fd_ofiles[indx] = NULL;
1183 			FILEDESC_UNLOCK(fdp);
1184 			fdrop(fp, td);
1185 		} else
1186 			FILEDESC_UNLOCK(fdp);
1187 
1188 		if (error == ERESTART)
1189 			error = EINTR;
1190 		return (error);
1191 	}
1192 	td->td_dupfd = 0;
1193 	NDFREE(&nd, NDF_ONLY_PNBUF);
1194 	vp = nd.ni_vp;
1195 
1196 	/*
1197 	 * There should be 2 references on the file, one from the descriptor
1198 	 * table, and one for us.
1199 	 *
1200 	 * Handle the case where someone closed the file (via its file
1201 	 * descriptor) while we were blocked.  The end result should look
1202 	 * like opening the file succeeded but it was immediately closed.
1203 	 */
1204 	FILEDESC_LOCK(fdp);
1205 	FILE_LOCK(fp);
1206 	if (fp->f_count == 1) {
1207 		KASSERT(fdp->fd_ofiles[indx] != fp,
1208 		    ("Open file descriptor lost all refs"));
1209 		FILEDESC_UNLOCK(fdp);
1210 		FILE_UNLOCK(fp);
1211 		VOP_UNLOCK(vp, 0, td);
1212 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1213 		fdrop(fp, td);
1214 		td->td_retval[0] = indx;
1215 		return 0;
1216 	}
1217 
1218 	fp->f_data = (caddr_t)vp;
1219 	fp->f_flag = flags & FMASK;
1220 	fp->f_ops = &vnops;
1221 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1222 	FILEDESC_UNLOCK(fdp);
1223 	FILE_UNLOCK(fp);
1224 	VOP_UNLOCK(vp, 0, td);
1225 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1226 		lf.l_whence = SEEK_SET;
1227 		lf.l_start = 0;
1228 		lf.l_len = 0;
1229 		if (flags & O_EXLOCK)
1230 			lf.l_type = F_WRLCK;
1231 		else
1232 			lf.l_type = F_RDLCK;
1233 		type = F_FLOCK;
1234 		if ((flags & FNONBLOCK) == 0)
1235 			type |= F_WAIT;
1236 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1237 			goto bad;
1238 		fp->f_flag |= FHASLOCK;
1239 	}
1240 	if (flags & O_TRUNC) {
1241 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1242 			goto bad;
1243 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1244 		VATTR_NULL(&vat);
1245 		vat.va_size = 0;
1246 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1247 		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1248 		VOP_UNLOCK(vp, 0, td);
1249 		vn_finished_write(mp);
1250 		if (error)
1251 			goto bad;
1252 	}
1253 	/* assert that vn_open created a backing object if one is needed */
1254 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1255 		("open: vmio vnode has no backing object after vn_open"));
1256 	/*
1257 	 * Release our private reference, leaving the one associated with
1258 	 * the descriptor table intact.
1259 	 */
1260 	fdrop(fp, td);
1261 	td->td_retval[0] = indx;
1262 	return (0);
1263 bad:
1264 	FILEDESC_LOCK(fdp);
1265 	if (fdp->fd_ofiles[indx] == fp) {
1266 		fdp->fd_ofiles[indx] = NULL;
1267 		FILEDESC_UNLOCK(fdp);
1268 		fdrop(fp, td);
1269 	} else
1270 		FILEDESC_UNLOCK(fdp);
1271 	return (error);
1272 }
1273 
1274 #ifdef COMPAT_43
1275 /*
1276  * Create a file.
1277  */
1278 #ifndef _SYS_SYSPROTO_H_
1279 struct ocreat_args {
1280 	char	*path;
1281 	int	mode;
1282 };
1283 #endif
1284 int
1285 ocreat(td, uap)
1286 	struct thread *td;
1287 	register struct ocreat_args /* {
1288 		syscallarg(char *) path;
1289 		syscallarg(int) mode;
1290 	} */ *uap;
1291 {
1292 	struct open_args /* {
1293 		syscallarg(char *) path;
1294 		syscallarg(int) flags;
1295 		syscallarg(int) mode;
1296 	} */ nuap;
1297 
1298 	SCARG(&nuap, path) = SCARG(uap, path);
1299 	SCARG(&nuap, mode) = SCARG(uap, mode);
1300 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1301 	return (open(td, &nuap));
1302 }
1303 #endif /* COMPAT_43 */
1304 
1305 /*
1306  * Create a special file.
1307  */
1308 #ifndef _SYS_SYSPROTO_H_
1309 struct mknod_args {
1310 	char	*path;
1311 	int	mode;
1312 	int	dev;
1313 };
1314 #endif
1315 /* ARGSUSED */
1316 int
1317 mknod(td, uap)
1318 	struct thread *td;
1319 	register struct mknod_args /* {
1320 		syscallarg(char *) path;
1321 		syscallarg(int) mode;
1322 		syscallarg(int) dev;
1323 	} */ *uap;
1324 {
1325 	struct vnode *vp;
1326 	struct mount *mp;
1327 	struct vattr vattr;
1328 	int error;
1329 	int whiteout = 0;
1330 	struct nameidata nd;
1331 
1332 	switch (SCARG(uap, mode) & S_IFMT) {
1333 	case S_IFCHR:
1334 	case S_IFBLK:
1335 		error = suser_td(td);
1336 		break;
1337 	default:
1338 		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1339 		break;
1340 	}
1341 	if (error)
1342 		return (error);
1343 restart:
1344 	bwillwrite();
1345 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1346 	if ((error = namei(&nd)) != 0)
1347 		return (error);
1348 	vp = nd.ni_vp;
1349 	if (vp != NULL) {
1350 		vrele(vp);
1351 		error = EEXIST;
1352 	} else {
1353 		VATTR_NULL(&vattr);
1354 		FILEDESC_LOCK(td->td_proc->p_fd);
1355 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1356 		FILEDESC_UNLOCK(td->td_proc->p_fd);
1357 		vattr.va_rdev = SCARG(uap, dev);
1358 		whiteout = 0;
1359 
1360 		switch (SCARG(uap, mode) & S_IFMT) {
1361 		case S_IFMT:	/* used by badsect to flag bad sectors */
1362 			vattr.va_type = VBAD;
1363 			break;
1364 		case S_IFCHR:
1365 			vattr.va_type = VCHR;
1366 			break;
1367 		case S_IFBLK:
1368 			vattr.va_type = VBLK;
1369 			break;
1370 		case S_IFWHT:
1371 			whiteout = 1;
1372 			break;
1373 		default:
1374 			error = EINVAL;
1375 			break;
1376 		}
1377 	}
1378 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1379 		NDFREE(&nd, NDF_ONLY_PNBUF);
1380 		vput(nd.ni_dvp);
1381 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1382 			return (error);
1383 		goto restart;
1384 	}
1385 	if (!error) {
1386 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1387 		if (whiteout)
1388 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1389 		else {
1390 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1391 						&nd.ni_cnd, &vattr);
1392 			if (error == 0)
1393 				vput(nd.ni_vp);
1394 		}
1395 	}
1396 	NDFREE(&nd, NDF_ONLY_PNBUF);
1397 	vput(nd.ni_dvp);
1398 	vn_finished_write(mp);
1399 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1400 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1401 	return (error);
1402 }
1403 
1404 /*
1405  * Create a named pipe.
1406  */
1407 #ifndef _SYS_SYSPROTO_H_
1408 struct mkfifo_args {
1409 	char	*path;
1410 	int	mode;
1411 };
1412 #endif
1413 /* ARGSUSED */
1414 int
1415 mkfifo(td, uap)
1416 	struct thread *td;
1417 	register struct mkfifo_args /* {
1418 		syscallarg(char *) path;
1419 		syscallarg(int) mode;
1420 	} */ *uap;
1421 {
1422 	struct mount *mp;
1423 	struct vattr vattr;
1424 	int error;
1425 	struct nameidata nd;
1426 
1427 restart:
1428 	bwillwrite();
1429 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1430 	if ((error = namei(&nd)) != 0)
1431 		return (error);
1432 	if (nd.ni_vp != NULL) {
1433 		NDFREE(&nd, NDF_ONLY_PNBUF);
1434 		vrele(nd.ni_vp);
1435 		vput(nd.ni_dvp);
1436 		return (EEXIST);
1437 	}
1438 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1439 		NDFREE(&nd, NDF_ONLY_PNBUF);
1440 		vput(nd.ni_dvp);
1441 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1442 			return (error);
1443 		goto restart;
1444 	}
1445 	VATTR_NULL(&vattr);
1446 	vattr.va_type = VFIFO;
1447 	FILEDESC_LOCK(td->td_proc->p_fd);
1448 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1449 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1450 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1451 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1452 	if (error == 0)
1453 		vput(nd.ni_vp);
1454 	NDFREE(&nd, NDF_ONLY_PNBUF);
1455 	vput(nd.ni_dvp);
1456 	vn_finished_write(mp);
1457 	return (error);
1458 }
1459 
1460 /*
1461  * Make a hard file link.
1462  */
1463 #ifndef _SYS_SYSPROTO_H_
1464 struct link_args {
1465 	char	*path;
1466 	char	*link;
1467 };
1468 #endif
1469 /* ARGSUSED */
1470 int
1471 link(td, uap)
1472 	struct thread *td;
1473 	register struct link_args /* {
1474 		syscallarg(char *) path;
1475 		syscallarg(char *) link;
1476 	} */ *uap;
1477 {
1478 	struct vnode *vp;
1479 	struct mount *mp;
1480 	struct nameidata nd;
1481 	int error;
1482 
1483 	bwillwrite();
1484 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1485 	if ((error = namei(&nd)) != 0)
1486 		return (error);
1487 	NDFREE(&nd, NDF_ONLY_PNBUF);
1488 	vp = nd.ni_vp;
1489 	if (vp->v_type == VDIR) {
1490 		vrele(vp);
1491 		return (EPERM);		/* POSIX */
1492 	}
1493 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1494 		vrele(vp);
1495 		return (error);
1496 	}
1497 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1498 	if ((error = namei(&nd)) == 0) {
1499 		if (nd.ni_vp != NULL) {
1500 			vrele(nd.ni_vp);
1501 			error = EEXIST;
1502 		} else {
1503 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1504 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1505 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1506 		}
1507 		NDFREE(&nd, NDF_ONLY_PNBUF);
1508 		vput(nd.ni_dvp);
1509 	}
1510 	vrele(vp);
1511 	vn_finished_write(mp);
1512 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1513 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1514 	return (error);
1515 }
1516 
1517 /*
1518  * Make a symbolic link.
1519  */
1520 #ifndef _SYS_SYSPROTO_H_
1521 struct symlink_args {
1522 	char	*path;
1523 	char	*link;
1524 };
1525 #endif
1526 /* ARGSUSED */
1527 int
1528 symlink(td, uap)
1529 	struct thread *td;
1530 	register struct symlink_args /* {
1531 		syscallarg(char *) path;
1532 		syscallarg(char *) link;
1533 	} */ *uap;
1534 {
1535 	struct mount *mp;
1536 	struct vattr vattr;
1537 	char *path;
1538 	int error;
1539 	struct nameidata nd;
1540 
1541 	path = uma_zalloc(namei_zone, M_WAITOK);
1542 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1543 		goto out;
1544 restart:
1545 	bwillwrite();
1546 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1547 	if ((error = namei(&nd)) != 0)
1548 		goto out;
1549 	if (nd.ni_vp) {
1550 		NDFREE(&nd, NDF_ONLY_PNBUF);
1551 		vrele(nd.ni_vp);
1552 		vput(nd.ni_dvp);
1553 		error = EEXIST;
1554 		goto out;
1555 	}
1556 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1557 		NDFREE(&nd, NDF_ONLY_PNBUF);
1558 		vput(nd.ni_dvp);
1559 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1560 			return (error);
1561 		goto restart;
1562 	}
1563 	VATTR_NULL(&vattr);
1564 	FILEDESC_LOCK(td->td_proc->p_fd);
1565 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1566 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1567 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1568 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1569 	NDFREE(&nd, NDF_ONLY_PNBUF);
1570 	if (error == 0)
1571 		vput(nd.ni_vp);
1572 	vput(nd.ni_dvp);
1573 	vn_finished_write(mp);
1574 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1575 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1576 out:
1577 	uma_zfree(namei_zone, path);
1578 	return (error);
1579 }
1580 
1581 /*
1582  * Delete a whiteout from the filesystem.
1583  */
1584 /* ARGSUSED */
1585 int
1586 undelete(td, uap)
1587 	struct thread *td;
1588 	register struct undelete_args /* {
1589 		syscallarg(char *) path;
1590 	} */ *uap;
1591 {
1592 	int error;
1593 	struct mount *mp;
1594 	struct nameidata nd;
1595 
1596 restart:
1597 	bwillwrite();
1598 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1599 	    SCARG(uap, path), td);
1600 	error = namei(&nd);
1601 	if (error)
1602 		return (error);
1603 
1604 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1605 		NDFREE(&nd, NDF_ONLY_PNBUF);
1606 		if (nd.ni_vp)
1607 			vrele(nd.ni_vp);
1608 		vput(nd.ni_dvp);
1609 		return (EEXIST);
1610 	}
1611 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1612 		NDFREE(&nd, NDF_ONLY_PNBUF);
1613 		vput(nd.ni_dvp);
1614 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1615 			return (error);
1616 		goto restart;
1617 	}
1618 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1619 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1620 	NDFREE(&nd, NDF_ONLY_PNBUF);
1621 	vput(nd.ni_dvp);
1622 	vn_finished_write(mp);
1623 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1624 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1625 	return (error);
1626 }
1627 
1628 /*
1629  * Delete a name from the filesystem.
1630  */
1631 #ifndef _SYS_SYSPROTO_H_
1632 struct unlink_args {
1633 	char	*path;
1634 };
1635 #endif
1636 /* ARGSUSED */
1637 int
1638 unlink(td, uap)
1639 	struct thread *td;
1640 	struct unlink_args /* {
1641 		syscallarg(char *) path;
1642 	} */ *uap;
1643 {
1644 	struct mount *mp;
1645 	struct vnode *vp;
1646 	int error;
1647 	struct nameidata nd;
1648 
1649 restart:
1650 	bwillwrite();
1651 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1652 	if ((error = namei(&nd)) != 0)
1653 		return (error);
1654 	vp = nd.ni_vp;
1655 	if (vp->v_type == VDIR)
1656 		error = EPERM;		/* POSIX */
1657 	else {
1658 		/*
1659 		 * The root of a mounted filesystem cannot be deleted.
1660 		 *
1661 		 * XXX: can this only be a VDIR case?
1662 		 */
1663 		if (vp->v_flag & VROOT)
1664 			error = EBUSY;
1665 	}
1666 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1667 		NDFREE(&nd, NDF_ONLY_PNBUF);
1668 		vrele(vp);
1669 		vput(nd.ni_dvp);
1670 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1671 			return (error);
1672 		goto restart;
1673 	}
1674 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1675 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1676 	if (!error) {
1677 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1678 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1679 	}
1680 	NDFREE(&nd, NDF_ONLY_PNBUF);
1681 	vput(nd.ni_dvp);
1682 	vput(vp);
1683 	vn_finished_write(mp);
1684 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1685 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1686 	return (error);
1687 }
1688 
1689 /*
1690  * Reposition read/write file offset.
1691  */
1692 #ifndef _SYS_SYSPROTO_H_
1693 struct lseek_args {
1694 	int	fd;
1695 	int	pad;
1696 	off_t	offset;
1697 	int	whence;
1698 };
1699 #endif
1700 int
1701 lseek(td, uap)
1702 	struct thread *td;
1703 	register struct lseek_args /* {
1704 		syscallarg(int) fd;
1705 		syscallarg(int) pad;
1706 		syscallarg(off_t) offset;
1707 		syscallarg(int) whence;
1708 	} */ *uap;
1709 {
1710 	struct ucred *cred = td->td_ucred;
1711 	struct file *fp;
1712 	struct vnode *vp;
1713 	struct vattr vattr;
1714 	off_t offset;
1715 	int error, noneg;
1716 
1717 	if ((error = fget(td, uap->fd, &fp)) != 0)
1718 		return (error);
1719 	if (fp->f_type != DTYPE_VNODE) {
1720 		fdrop(fp, td);
1721 		return (ESPIPE);
1722 	}
1723 	vp = (struct vnode *)fp->f_data;
1724 	noneg = (vp->v_type != VCHR);
1725 	offset = SCARG(uap, offset);
1726 	switch (SCARG(uap, whence)) {
1727 	case L_INCR:
1728 		if (noneg &&
1729 		    (fp->f_offset < 0 ||
1730 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1731 			return (EOVERFLOW);
1732 		offset += fp->f_offset;
1733 		break;
1734 	case L_XTND:
1735 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1736 		error = VOP_GETATTR(vp, &vattr, cred, td);
1737 		VOP_UNLOCK(vp, 0, td);
1738 		if (error)
1739 			return (error);
1740 		if (noneg &&
1741 		    (vattr.va_size > OFF_MAX ||
1742 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1743 			return (EOVERFLOW);
1744 		offset += vattr.va_size;
1745 		break;
1746 	case L_SET:
1747 		break;
1748 	default:
1749 		fdrop(fp, td);
1750 		return (EINVAL);
1751 	}
1752 	if (noneg && offset < 0)
1753 		return (EINVAL);
1754 	fp->f_offset = offset;
1755 	*(off_t *)(td->td_retval) = fp->f_offset;
1756 	fdrop(fp, td);
1757 	return (0);
1758 }
1759 
1760 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1761 /*
1762  * Reposition read/write file offset.
1763  */
1764 #ifndef _SYS_SYSPROTO_H_
1765 struct olseek_args {
1766 	int	fd;
1767 	long	offset;
1768 	int	whence;
1769 };
1770 #endif
1771 int
1772 olseek(td, uap)
1773 	struct thread *td;
1774 	register struct olseek_args /* {
1775 		syscallarg(int) fd;
1776 		syscallarg(long) offset;
1777 		syscallarg(int) whence;
1778 	} */ *uap;
1779 {
1780 	struct lseek_args /* {
1781 		syscallarg(int) fd;
1782 		syscallarg(int) pad;
1783 		syscallarg(off_t) offset;
1784 		syscallarg(int) whence;
1785 	} */ nuap;
1786 	int error;
1787 
1788 	SCARG(&nuap, fd) = SCARG(uap, fd);
1789 	SCARG(&nuap, offset) = SCARG(uap, offset);
1790 	SCARG(&nuap, whence) = SCARG(uap, whence);
1791 	error = lseek(td, &nuap);
1792 	return (error);
1793 }
1794 #endif /* COMPAT_43 */
1795 
1796 /*
1797  * Check access permissions using passed credentials.
1798  */
1799 static int
1800 vn_access(vp, user_flags, cred, td)
1801 	struct vnode	*vp;
1802 	int		user_flags;
1803 	struct ucred	*cred;
1804 	struct thread	*td;
1805 {
1806 	int error, flags;
1807 
1808 	/* Flags == 0 means only check for existence. */
1809 	error = 0;
1810 	if (user_flags) {
1811 		flags = 0;
1812 		if (user_flags & R_OK)
1813 			flags |= VREAD;
1814 		if (user_flags & W_OK)
1815 			flags |= VWRITE;
1816 		if (user_flags & X_OK)
1817 			flags |= VEXEC;
1818 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1819 			error = VOP_ACCESS(vp, flags, cred, td);
1820 	}
1821 	return (error);
1822 }
1823 
1824 /*
1825  * Check access permissions using "real" credentials.
1826  */
1827 #ifndef _SYS_SYSPROTO_H_
1828 struct access_args {
1829 	char	*path;
1830 	int	flags;
1831 };
1832 #endif
1833 int
1834 access(td, uap)
1835 	struct thread *td;
1836 	register struct access_args /* {
1837 		syscallarg(char *) path;
1838 		syscallarg(int) flags;
1839 	} */ *uap;
1840 {
1841 	struct ucred *cred, *tmpcred;
1842 	register struct vnode *vp;
1843 	int error;
1844 	struct nameidata nd;
1845 
1846 	/*
1847 	 * Create and modify a temporary credential instead of one that
1848 	 * is potentially shared.  This could also mess up socket
1849 	 * buffer accounting which can run in an interrupt context.
1850 	 *
1851 	 * XXX - Depending on how "threads" are finally implemented, it
1852 	 * may be better to explicitly pass the credential to namei()
1853 	 * rather than to modify the potentially shared process structure.
1854 	 */
1855 	cred = td->td_ucred;
1856 	tmpcred = crdup(cred);
1857 	tmpcred->cr_uid = cred->cr_ruid;
1858 	tmpcred->cr_groups[0] = cred->cr_rgid;
1859 	td->td_ucred = tmpcred;
1860 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1861 	    SCARG(uap, path), td);
1862 	if ((error = namei(&nd)) != 0)
1863 		goto out1;
1864 	vp = nd.ni_vp;
1865 
1866 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1867 	NDFREE(&nd, NDF_ONLY_PNBUF);
1868 	vput(vp);
1869 out1:
1870 	td->td_ucred = cred;
1871 	crfree(tmpcred);
1872 	return (error);
1873 }
1874 
1875 /*
1876  * Check access permissions using "effective" credentials.
1877  */
1878 #ifndef _SYS_SYSPROTO_H_
1879 struct eaccess_args {
1880 	char	*path;
1881 	int	flags;
1882 };
1883 #endif
1884 int
1885 eaccess(td, uap)
1886 	struct thread *td;
1887 	register struct eaccess_args /* {
1888 		syscallarg(char *) path;
1889 		syscallarg(int) flags;
1890 	} */ *uap;
1891 {
1892 	struct nameidata nd;
1893 	struct vnode *vp;
1894 	int error;
1895 
1896 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1897 	    SCARG(uap, path), td);
1898 	if ((error = namei(&nd)) != 0)
1899 		return (error);
1900 	vp = nd.ni_vp;
1901 
1902 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1903 	NDFREE(&nd, NDF_ONLY_PNBUF);
1904 	vput(vp);
1905 	return (error);
1906 }
1907 
1908 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1909 /*
1910  * Get file status; this version follows links.
1911  */
1912 #ifndef _SYS_SYSPROTO_H_
1913 struct ostat_args {
1914 	char	*path;
1915 	struct ostat *ub;
1916 };
1917 #endif
1918 /* ARGSUSED */
1919 int
1920 ostat(td, uap)
1921 	struct thread *td;
1922 	register struct ostat_args /* {
1923 		syscallarg(char *) path;
1924 		syscallarg(struct ostat *) ub;
1925 	} */ *uap;
1926 {
1927 	struct stat sb;
1928 	struct ostat osb;
1929 	int error;
1930 	struct nameidata nd;
1931 
1932 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1933 	    SCARG(uap, path), td);
1934 	if ((error = namei(&nd)) != 0)
1935 		return (error);
1936 	NDFREE(&nd, NDF_ONLY_PNBUF);
1937 	error = vn_stat(nd.ni_vp, &sb, td);
1938 	vput(nd.ni_vp);
1939 	if (error)
1940 		return (error);
1941 	cvtstat(&sb, &osb);
1942 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1943 	return (error);
1944 }
1945 
1946 /*
1947  * Get file status; this version does not follow links.
1948  */
1949 #ifndef _SYS_SYSPROTO_H_
1950 struct olstat_args {
1951 	char	*path;
1952 	struct ostat *ub;
1953 };
1954 #endif
1955 /* ARGSUSED */
1956 int
1957 olstat(td, uap)
1958 	struct thread *td;
1959 	register struct olstat_args /* {
1960 		syscallarg(char *) path;
1961 		syscallarg(struct ostat *) ub;
1962 	} */ *uap;
1963 {
1964 	struct vnode *vp;
1965 	struct stat sb;
1966 	struct ostat osb;
1967 	int error;
1968 	struct nameidata nd;
1969 
1970 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1971 	    SCARG(uap, path), td);
1972 	if ((error = namei(&nd)) != 0)
1973 		return (error);
1974 	vp = nd.ni_vp;
1975 	error = vn_stat(vp, &sb, td);
1976 	NDFREE(&nd, NDF_ONLY_PNBUF);
1977 	vput(vp);
1978 	if (error)
1979 		return (error);
1980 	cvtstat(&sb, &osb);
1981 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1982 	return (error);
1983 }
1984 
1985 /*
1986  * Convert from an old to a new stat structure.
1987  */
1988 void
1989 cvtstat(st, ost)
1990 	struct stat *st;
1991 	struct ostat *ost;
1992 {
1993 
1994 	ost->st_dev = st->st_dev;
1995 	ost->st_ino = st->st_ino;
1996 	ost->st_mode = st->st_mode;
1997 	ost->st_nlink = st->st_nlink;
1998 	ost->st_uid = st->st_uid;
1999 	ost->st_gid = st->st_gid;
2000 	ost->st_rdev = st->st_rdev;
2001 	if (st->st_size < (quad_t)1 << 32)
2002 		ost->st_size = st->st_size;
2003 	else
2004 		ost->st_size = -2;
2005 	ost->st_atime = st->st_atime;
2006 	ost->st_mtime = st->st_mtime;
2007 	ost->st_ctime = st->st_ctime;
2008 	ost->st_blksize = st->st_blksize;
2009 	ost->st_blocks = st->st_blocks;
2010 	ost->st_flags = st->st_flags;
2011 	ost->st_gen = st->st_gen;
2012 }
2013 #endif /* COMPAT_43 || COMPAT_SUNOS */
2014 
2015 /*
2016  * Get file status; this version follows links.
2017  */
2018 #ifndef _SYS_SYSPROTO_H_
2019 struct stat_args {
2020 	char	*path;
2021 	struct stat *ub;
2022 };
2023 #endif
2024 /* ARGSUSED */
2025 int
2026 stat(td, uap)
2027 	struct thread *td;
2028 	register struct stat_args /* {
2029 		syscallarg(char *) path;
2030 		syscallarg(struct stat *) ub;
2031 	} */ *uap;
2032 {
2033 	struct stat sb;
2034 	int error;
2035 	struct nameidata nd;
2036 
2037 #ifdef LOOKUP_SHARED
2038 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2039 	    UIO_USERSPACE, SCARG(uap, path), td);
2040 #else
2041 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2042 	    SCARG(uap, path), td);
2043 #endif
2044 	if ((error = namei(&nd)) != 0)
2045 		return (error);
2046 	error = vn_stat(nd.ni_vp, &sb, td);
2047 	NDFREE(&nd, NDF_ONLY_PNBUF);
2048 	vput(nd.ni_vp);
2049 	if (error)
2050 		return (error);
2051 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2052 	return (error);
2053 }
2054 
2055 /*
2056  * Get file status; this version does not follow links.
2057  */
2058 #ifndef _SYS_SYSPROTO_H_
2059 struct lstat_args {
2060 	char	*path;
2061 	struct stat *ub;
2062 };
2063 #endif
2064 /* ARGSUSED */
2065 int
2066 lstat(td, uap)
2067 	struct thread *td;
2068 	register struct lstat_args /* {
2069 		syscallarg(char *) path;
2070 		syscallarg(struct stat *) ub;
2071 	} */ *uap;
2072 {
2073 	int error;
2074 	struct vnode *vp;
2075 	struct stat sb;
2076 	struct nameidata nd;
2077 
2078 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2079 	    SCARG(uap, path), td);
2080 	if ((error = namei(&nd)) != 0)
2081 		return (error);
2082 	vp = nd.ni_vp;
2083 	error = vn_stat(vp, &sb, td);
2084 	NDFREE(&nd, NDF_ONLY_PNBUF);
2085 	vput(vp);
2086 	if (error)
2087 		return (error);
2088 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2089 	return (error);
2090 }
2091 
2092 /*
2093  * Implementation of the NetBSD stat() function.
2094  * XXX This should probably be collapsed with the FreeBSD version,
2095  * as the differences are only due to vn_stat() clearing spares at
2096  * the end of the structures.  vn_stat could be split to avoid this,
2097  * and thus collapse the following to close to zero code.
2098  */
2099 void
2100 cvtnstat(sb, nsb)
2101 	struct stat *sb;
2102 	struct nstat *nsb;
2103 {
2104 	nsb->st_dev = sb->st_dev;
2105 	nsb->st_ino = sb->st_ino;
2106 	nsb->st_mode = sb->st_mode;
2107 	nsb->st_nlink = sb->st_nlink;
2108 	nsb->st_uid = sb->st_uid;
2109 	nsb->st_gid = sb->st_gid;
2110 	nsb->st_rdev = sb->st_rdev;
2111 	nsb->st_atimespec = sb->st_atimespec;
2112 	nsb->st_mtimespec = sb->st_mtimespec;
2113 	nsb->st_ctimespec = sb->st_ctimespec;
2114 	nsb->st_size = sb->st_size;
2115 	nsb->st_blocks = sb->st_blocks;
2116 	nsb->st_blksize = sb->st_blksize;
2117 	nsb->st_flags = sb->st_flags;
2118 	nsb->st_gen = sb->st_gen;
2119 	nsb->st_qspare[0] = sb->st_qspare[0];
2120 	nsb->st_qspare[1] = sb->st_qspare[1];
2121 }
2122 
2123 #ifndef _SYS_SYSPROTO_H_
2124 struct nstat_args {
2125 	char	*path;
2126 	struct nstat *ub;
2127 };
2128 #endif
2129 /* ARGSUSED */
2130 int
2131 nstat(td, uap)
2132 	struct thread *td;
2133 	register struct nstat_args /* {
2134 		syscallarg(char *) path;
2135 		syscallarg(struct nstat *) ub;
2136 	} */ *uap;
2137 {
2138 	struct stat sb;
2139 	struct nstat nsb;
2140 	int error;
2141 	struct nameidata nd;
2142 
2143 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2144 	    SCARG(uap, path), td);
2145 	if ((error = namei(&nd)) != 0)
2146 		return (error);
2147 	NDFREE(&nd, NDF_ONLY_PNBUF);
2148 	error = vn_stat(nd.ni_vp, &sb, td);
2149 	vput(nd.ni_vp);
2150 	if (error)
2151 		return (error);
2152 	cvtnstat(&sb, &nsb);
2153 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2154 	return (error);
2155 }
2156 
2157 /*
2158  * NetBSD lstat.  Get file status; this version does not follow links.
2159  */
2160 #ifndef _SYS_SYSPROTO_H_
2161 struct lstat_args {
2162 	char	*path;
2163 	struct stat *ub;
2164 };
2165 #endif
2166 /* ARGSUSED */
2167 int
2168 nlstat(td, uap)
2169 	struct thread *td;
2170 	register struct nlstat_args /* {
2171 		syscallarg(char *) path;
2172 		syscallarg(struct nstat *) ub;
2173 	} */ *uap;
2174 {
2175 	int error;
2176 	struct vnode *vp;
2177 	struct stat sb;
2178 	struct nstat nsb;
2179 	struct nameidata nd;
2180 
2181 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2182 	    SCARG(uap, path), td);
2183 	if ((error = namei(&nd)) != 0)
2184 		return (error);
2185 	vp = nd.ni_vp;
2186 	NDFREE(&nd, NDF_ONLY_PNBUF);
2187 	error = vn_stat(vp, &sb, td);
2188 	vput(vp);
2189 	if (error)
2190 		return (error);
2191 	cvtnstat(&sb, &nsb);
2192 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2193 	return (error);
2194 }
2195 
2196 /*
2197  * Get configurable pathname variables.
2198  */
2199 #ifndef _SYS_SYSPROTO_H_
2200 struct pathconf_args {
2201 	char	*path;
2202 	int	name;
2203 };
2204 #endif
2205 /* ARGSUSED */
2206 int
2207 pathconf(td, uap)
2208 	struct thread *td;
2209 	register struct pathconf_args /* {
2210 		syscallarg(char *) path;
2211 		syscallarg(int) name;
2212 	} */ *uap;
2213 {
2214 	int error;
2215 	struct nameidata nd;
2216 
2217 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2218 	    SCARG(uap, path), td);
2219 	if ((error = namei(&nd)) != 0)
2220 		return (error);
2221 	NDFREE(&nd, NDF_ONLY_PNBUF);
2222 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2223 	vput(nd.ni_vp);
2224 	return (error);
2225 }
2226 
2227 /*
2228  * Return target name of a symbolic link.
2229  */
2230 #ifndef _SYS_SYSPROTO_H_
2231 struct readlink_args {
2232 	char	*path;
2233 	char	*buf;
2234 	int	count;
2235 };
2236 #endif
2237 /* ARGSUSED */
2238 int
2239 readlink(td, uap)
2240 	struct thread *td;
2241 	register struct readlink_args /* {
2242 		syscallarg(char *) path;
2243 		syscallarg(char *) buf;
2244 		syscallarg(int) count;
2245 	} */ *uap;
2246 {
2247 	register struct vnode *vp;
2248 	struct iovec aiov;
2249 	struct uio auio;
2250 	int error;
2251 	struct nameidata nd;
2252 
2253 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2254 	    SCARG(uap, path), td);
2255 	if ((error = namei(&nd)) != 0)
2256 		return (error);
2257 	NDFREE(&nd, NDF_ONLY_PNBUF);
2258 	vp = nd.ni_vp;
2259 	if (vp->v_type != VLNK)
2260 		error = EINVAL;
2261 	else {
2262 		aiov.iov_base = SCARG(uap, buf);
2263 		aiov.iov_len = SCARG(uap, count);
2264 		auio.uio_iov = &aiov;
2265 		auio.uio_iovcnt = 1;
2266 		auio.uio_offset = 0;
2267 		auio.uio_rw = UIO_READ;
2268 		auio.uio_segflg = UIO_USERSPACE;
2269 		auio.uio_td = td;
2270 		auio.uio_resid = SCARG(uap, count);
2271 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2272 	}
2273 	vput(vp);
2274 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2275 	return (error);
2276 }
2277 
2278 /*
2279  * Common implementation code for chflags() and fchflags().
2280  */
2281 static int
2282 setfflags(td, vp, flags)
2283 	struct thread *td;
2284 	struct vnode *vp;
2285 	int flags;
2286 {
2287 	int error;
2288 	struct mount *mp;
2289 	struct vattr vattr;
2290 
2291 	/*
2292 	 * Prevent non-root users from setting flags on devices.  When
2293 	 * a device is reused, users can retain ownership of the device
2294 	 * if they are allowed to set flags and programs assume that
2295 	 * chown can't fail when done as root.
2296 	 */
2297 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2298 		error = suser_xxx(td->td_ucred, td->td_proc, PRISON_ROOT);
2299 		if (error)
2300 			return (error);
2301 	}
2302 
2303 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2304 		return (error);
2305 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2306 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2307 	VATTR_NULL(&vattr);
2308 	vattr.va_flags = flags;
2309 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2310 	VOP_UNLOCK(vp, 0, td);
2311 	vn_finished_write(mp);
2312 	return (error);
2313 }
2314 
2315 /*
2316  * Change flags of a file given a path name.
2317  */
2318 #ifndef _SYS_SYSPROTO_H_
2319 struct chflags_args {
2320 	char	*path;
2321 	int	flags;
2322 };
2323 #endif
2324 /* ARGSUSED */
2325 int
2326 chflags(td, uap)
2327 	struct thread *td;
2328 	register struct chflags_args /* {
2329 		syscallarg(char *) path;
2330 		syscallarg(int) flags;
2331 	} */ *uap;
2332 {
2333 	int error;
2334 	struct nameidata nd;
2335 
2336 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2337 	if ((error = namei(&nd)) != 0)
2338 		return (error);
2339 	NDFREE(&nd, NDF_ONLY_PNBUF);
2340 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2341 	vrele(nd.ni_vp);
2342 	return error;
2343 }
2344 
2345 /*
2346  * Change flags of a file given a file descriptor.
2347  */
2348 #ifndef _SYS_SYSPROTO_H_
2349 struct fchflags_args {
2350 	int	fd;
2351 	int	flags;
2352 };
2353 #endif
2354 /* ARGSUSED */
2355 int
2356 fchflags(td, uap)
2357 	struct thread *td;
2358 	register struct fchflags_args /* {
2359 		syscallarg(int) fd;
2360 		syscallarg(int) flags;
2361 	} */ *uap;
2362 {
2363 	struct file *fp;
2364 	int error;
2365 
2366 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2367 		return (error);
2368 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2369 	fdrop(fp, td);
2370 	return (error);
2371 }
2372 
2373 /*
2374  * Common implementation code for chmod(), lchmod() and fchmod().
2375  */
2376 static int
2377 setfmode(td, vp, mode)
2378 	struct thread *td;
2379 	struct vnode *vp;
2380 	int mode;
2381 {
2382 	int error;
2383 	struct mount *mp;
2384 	struct vattr vattr;
2385 
2386 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2387 		return (error);
2388 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2389 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2390 	VATTR_NULL(&vattr);
2391 	vattr.va_mode = mode & ALLPERMS;
2392 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2393 	VOP_UNLOCK(vp, 0, td);
2394 	vn_finished_write(mp);
2395 	return error;
2396 }
2397 
2398 /*
2399  * Change mode of a file given path name.
2400  */
2401 #ifndef _SYS_SYSPROTO_H_
2402 struct chmod_args {
2403 	char	*path;
2404 	int	mode;
2405 };
2406 #endif
2407 /* ARGSUSED */
2408 int
2409 chmod(td, uap)
2410 	struct thread *td;
2411 	register struct chmod_args /* {
2412 		syscallarg(char *) path;
2413 		syscallarg(int) mode;
2414 	} */ *uap;
2415 {
2416 	int error;
2417 	struct nameidata nd;
2418 
2419 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2420 	if ((error = namei(&nd)) != 0)
2421 		return (error);
2422 	NDFREE(&nd, NDF_ONLY_PNBUF);
2423 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2424 	vrele(nd.ni_vp);
2425 	return error;
2426 }
2427 
2428 /*
2429  * Change mode of a file given path name (don't follow links.)
2430  */
2431 #ifndef _SYS_SYSPROTO_H_
2432 struct lchmod_args {
2433 	char	*path;
2434 	int	mode;
2435 };
2436 #endif
2437 /* ARGSUSED */
2438 int
2439 lchmod(td, uap)
2440 	struct thread *td;
2441 	register struct lchmod_args /* {
2442 		syscallarg(char *) path;
2443 		syscallarg(int) mode;
2444 	} */ *uap;
2445 {
2446 	int error;
2447 	struct nameidata nd;
2448 
2449 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2450 	if ((error = namei(&nd)) != 0)
2451 		return (error);
2452 	NDFREE(&nd, NDF_ONLY_PNBUF);
2453 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2454 	vrele(nd.ni_vp);
2455 	return error;
2456 }
2457 
2458 /*
2459  * Change mode of a file given a file descriptor.
2460  */
2461 #ifndef _SYS_SYSPROTO_H_
2462 struct fchmod_args {
2463 	int	fd;
2464 	int	mode;
2465 };
2466 #endif
2467 /* ARGSUSED */
2468 int
2469 fchmod(td, uap)
2470 	struct thread *td;
2471 	register struct fchmod_args /* {
2472 		syscallarg(int) fd;
2473 		syscallarg(int) mode;
2474 	} */ *uap;
2475 {
2476 	struct file *fp;
2477 	struct vnode *vp;
2478 	int error;
2479 
2480 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2481 		return (error);
2482 	vp = (struct vnode *)fp->f_data;
2483 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2484 	fdrop(fp, td);
2485 	return (error);
2486 }
2487 
2488 /*
2489  * Common implementation for chown(), lchown(), and fchown()
2490  */
2491 static int
2492 setfown(td, vp, uid, gid)
2493 	struct thread *td;
2494 	struct vnode *vp;
2495 	uid_t uid;
2496 	gid_t gid;
2497 {
2498 	int error;
2499 	struct mount *mp;
2500 	struct vattr vattr;
2501 
2502 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2503 		return (error);
2504 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2505 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2506 	VATTR_NULL(&vattr);
2507 	vattr.va_uid = uid;
2508 	vattr.va_gid = gid;
2509 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2510 	VOP_UNLOCK(vp, 0, td);
2511 	vn_finished_write(mp);
2512 	return error;
2513 }
2514 
2515 /*
2516  * Set ownership given a path name.
2517  */
2518 #ifndef _SYS_SYSPROTO_H_
2519 struct chown_args {
2520 	char	*path;
2521 	int	uid;
2522 	int	gid;
2523 };
2524 #endif
2525 /* ARGSUSED */
2526 int
2527 chown(td, uap)
2528 	struct thread *td;
2529 	register struct chown_args /* {
2530 		syscallarg(char *) path;
2531 		syscallarg(int) uid;
2532 		syscallarg(int) gid;
2533 	} */ *uap;
2534 {
2535 	int error;
2536 	struct nameidata nd;
2537 
2538 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2539 	if ((error = namei(&nd)) != 0)
2540 		return (error);
2541 	NDFREE(&nd, NDF_ONLY_PNBUF);
2542 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2543 	vrele(nd.ni_vp);
2544 	return (error);
2545 }
2546 
2547 /*
2548  * Set ownership given a path name, do not cross symlinks.
2549  */
2550 #ifndef _SYS_SYSPROTO_H_
2551 struct lchown_args {
2552 	char	*path;
2553 	int	uid;
2554 	int	gid;
2555 };
2556 #endif
2557 /* ARGSUSED */
2558 int
2559 lchown(td, uap)
2560 	struct thread *td;
2561 	register struct lchown_args /* {
2562 		syscallarg(char *) path;
2563 		syscallarg(int) uid;
2564 		syscallarg(int) gid;
2565 	} */ *uap;
2566 {
2567 	int error;
2568 	struct nameidata nd;
2569 
2570 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2571 	if ((error = namei(&nd)) != 0)
2572 		return (error);
2573 	NDFREE(&nd, NDF_ONLY_PNBUF);
2574 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2575 	vrele(nd.ni_vp);
2576 	return (error);
2577 }
2578 
2579 /*
2580  * Set ownership given a file descriptor.
2581  */
2582 #ifndef _SYS_SYSPROTO_H_
2583 struct fchown_args {
2584 	int	fd;
2585 	int	uid;
2586 	int	gid;
2587 };
2588 #endif
2589 /* ARGSUSED */
2590 int
2591 fchown(td, uap)
2592 	struct thread *td;
2593 	register struct fchown_args /* {
2594 		syscallarg(int) fd;
2595 		syscallarg(int) uid;
2596 		syscallarg(int) gid;
2597 	} */ *uap;
2598 {
2599 	struct file *fp;
2600 	struct vnode *vp;
2601 	int error;
2602 
2603 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2604 		return (error);
2605 	vp = (struct vnode *)fp->f_data;
2606 	error = setfown(td, (struct vnode *)fp->f_data,
2607 		SCARG(uap, uid), SCARG(uap, gid));
2608 	fdrop(fp, td);
2609 	return (error);
2610 }
2611 
2612 /*
2613  * Common implementation code for utimes(), lutimes(), and futimes().
2614  */
2615 static int
2616 getutimes(usrtvp, tsp)
2617 	const struct timeval *usrtvp;
2618 	struct timespec *tsp;
2619 {
2620 	struct timeval tv[2];
2621 	int error;
2622 
2623 	if (usrtvp == NULL) {
2624 		microtime(&tv[0]);
2625 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2626 		tsp[1] = tsp[0];
2627 	} else {
2628 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2629 			return (error);
2630 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2631 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2632 	}
2633 	return 0;
2634 }
2635 
2636 /*
2637  * Common implementation code for utimes(), lutimes(), and futimes().
2638  */
2639 static int
2640 setutimes(td, vp, ts, nullflag)
2641 	struct thread *td;
2642 	struct vnode *vp;
2643 	const struct timespec *ts;
2644 	int nullflag;
2645 {
2646 	int error;
2647 	struct mount *mp;
2648 	struct vattr vattr;
2649 
2650 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2651 		return (error);
2652 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2653 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2654 	VATTR_NULL(&vattr);
2655 	vattr.va_atime = ts[0];
2656 	vattr.va_mtime = ts[1];
2657 	if (nullflag)
2658 		vattr.va_vaflags |= VA_UTIMES_NULL;
2659 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2660 	VOP_UNLOCK(vp, 0, td);
2661 	vn_finished_write(mp);
2662 	return error;
2663 }
2664 
2665 /*
2666  * Set the access and modification times of a file.
2667  */
2668 #ifndef _SYS_SYSPROTO_H_
2669 struct utimes_args {
2670 	char	*path;
2671 	struct	timeval *tptr;
2672 };
2673 #endif
2674 /* ARGSUSED */
2675 int
2676 utimes(td, uap)
2677 	struct thread *td;
2678 	register struct utimes_args /* {
2679 		syscallarg(char *) path;
2680 		syscallarg(struct timeval *) tptr;
2681 	} */ *uap;
2682 {
2683 	struct timespec ts[2];
2684 	struct timeval *usrtvp;
2685 	int error;
2686 	struct nameidata nd;
2687 
2688 	usrtvp = SCARG(uap, tptr);
2689 	if ((error = getutimes(usrtvp, ts)) != 0)
2690 		return (error);
2691 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2692 	if ((error = namei(&nd)) != 0)
2693 		return (error);
2694 	NDFREE(&nd, NDF_ONLY_PNBUF);
2695 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2696 	vrele(nd.ni_vp);
2697 	return (error);
2698 }
2699 
2700 /*
2701  * Set the access and modification times of a file.
2702  */
2703 #ifndef _SYS_SYSPROTO_H_
2704 struct lutimes_args {
2705 	char	*path;
2706 	struct	timeval *tptr;
2707 };
2708 #endif
2709 /* ARGSUSED */
2710 int
2711 lutimes(td, uap)
2712 	struct thread *td;
2713 	register struct lutimes_args /* {
2714 		syscallarg(char *) path;
2715 		syscallarg(struct timeval *) tptr;
2716 	} */ *uap;
2717 {
2718 	struct timespec ts[2];
2719 	struct timeval *usrtvp;
2720 	int error;
2721 	struct nameidata nd;
2722 
2723 	usrtvp = SCARG(uap, tptr);
2724 	if ((error = getutimes(usrtvp, ts)) != 0)
2725 		return (error);
2726 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2727 	if ((error = namei(&nd)) != 0)
2728 		return (error);
2729 	NDFREE(&nd, NDF_ONLY_PNBUF);
2730 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2731 	vrele(nd.ni_vp);
2732 	return (error);
2733 }
2734 
2735 /*
2736  * Set the access and modification times of a file.
2737  */
2738 #ifndef _SYS_SYSPROTO_H_
2739 struct futimes_args {
2740 	int	fd;
2741 	struct	timeval *tptr;
2742 };
2743 #endif
2744 /* ARGSUSED */
2745 int
2746 futimes(td, uap)
2747 	struct thread *td;
2748 	register struct futimes_args /* {
2749 		syscallarg(int ) fd;
2750 		syscallarg(struct timeval *) tptr;
2751 	} */ *uap;
2752 {
2753 	struct timespec ts[2];
2754 	struct file *fp;
2755 	struct timeval *usrtvp;
2756 	int error;
2757 
2758 	usrtvp = SCARG(uap, tptr);
2759 	if ((error = getutimes(usrtvp, ts)) != 0)
2760 		return (error);
2761 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2762 		return (error);
2763 	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2764 	fdrop(fp, td);
2765 	return (error);
2766 }
2767 
2768 /*
2769  * Truncate a file given its path name.
2770  */
2771 #ifndef _SYS_SYSPROTO_H_
2772 struct truncate_args {
2773 	char	*path;
2774 	int	pad;
2775 	off_t	length;
2776 };
2777 #endif
2778 /* ARGSUSED */
2779 int
2780 truncate(td, uap)
2781 	struct thread *td;
2782 	register struct truncate_args /* {
2783 		syscallarg(char *) path;
2784 		syscallarg(int) pad;
2785 		syscallarg(off_t) length;
2786 	} */ *uap;
2787 {
2788 	struct mount *mp;
2789 	struct vnode *vp;
2790 	struct vattr vattr;
2791 	int error;
2792 	struct nameidata nd;
2793 
2794 	if (uap->length < 0)
2795 		return(EINVAL);
2796 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2797 	if ((error = namei(&nd)) != 0)
2798 		return (error);
2799 	vp = nd.ni_vp;
2800 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2801 		vrele(vp);
2802 		return (error);
2803 	}
2804 	NDFREE(&nd, NDF_ONLY_PNBUF);
2805 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2806 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2807 	if (vp->v_type == VDIR)
2808 		error = EISDIR;
2809 	else if ((error = vn_writechk(vp)) == 0 &&
2810 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2811 		VATTR_NULL(&vattr);
2812 		vattr.va_size = SCARG(uap, length);
2813 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2814 	}
2815 	vput(vp);
2816 	vn_finished_write(mp);
2817 	return (error);
2818 }
2819 
2820 /*
2821  * Truncate a file given a file descriptor.
2822  */
2823 #ifndef _SYS_SYSPROTO_H_
2824 struct ftruncate_args {
2825 	int	fd;
2826 	int	pad;
2827 	off_t	length;
2828 };
2829 #endif
2830 /* ARGSUSED */
2831 int
2832 ftruncate(td, uap)
2833 	struct thread *td;
2834 	register struct ftruncate_args /* {
2835 		syscallarg(int) fd;
2836 		syscallarg(int) pad;
2837 		syscallarg(off_t) length;
2838 	} */ *uap;
2839 {
2840 	struct mount *mp;
2841 	struct vattr vattr;
2842 	struct vnode *vp;
2843 	struct file *fp;
2844 	int error;
2845 
2846 	if (uap->length < 0)
2847 		return(EINVAL);
2848 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2849 		return (error);
2850 	if ((fp->f_flag & FWRITE) == 0) {
2851 		fdrop(fp, td);
2852 		return (EINVAL);
2853 	}
2854 	vp = (struct vnode *)fp->f_data;
2855 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2856 		fdrop(fp, td);
2857 		return (error);
2858 	}
2859 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2860 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2861 	if (vp->v_type == VDIR)
2862 		error = EISDIR;
2863 	else if ((error = vn_writechk(vp)) == 0) {
2864 		VATTR_NULL(&vattr);
2865 		vattr.va_size = SCARG(uap, length);
2866 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2867 	}
2868 	VOP_UNLOCK(vp, 0, td);
2869 	vn_finished_write(mp);
2870 	fdrop(fp, td);
2871 	return (error);
2872 }
2873 
2874 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2875 /*
2876  * Truncate a file given its path name.
2877  */
2878 #ifndef _SYS_SYSPROTO_H_
2879 struct otruncate_args {
2880 	char	*path;
2881 	long	length;
2882 };
2883 #endif
2884 /* ARGSUSED */
2885 int
2886 otruncate(td, uap)
2887 	struct thread *td;
2888 	register struct otruncate_args /* {
2889 		syscallarg(char *) path;
2890 		syscallarg(long) length;
2891 	} */ *uap;
2892 {
2893 	struct truncate_args /* {
2894 		syscallarg(char *) path;
2895 		syscallarg(int) pad;
2896 		syscallarg(off_t) length;
2897 	} */ nuap;
2898 
2899 	SCARG(&nuap, path) = SCARG(uap, path);
2900 	SCARG(&nuap, length) = SCARG(uap, length);
2901 	return (truncate(td, &nuap));
2902 }
2903 
2904 /*
2905  * Truncate a file given a file descriptor.
2906  */
2907 #ifndef _SYS_SYSPROTO_H_
2908 struct oftruncate_args {
2909 	int	fd;
2910 	long	length;
2911 };
2912 #endif
2913 /* ARGSUSED */
2914 int
2915 oftruncate(td, uap)
2916 	struct thread *td;
2917 	register struct oftruncate_args /* {
2918 		syscallarg(int) fd;
2919 		syscallarg(long) length;
2920 	} */ *uap;
2921 {
2922 	struct ftruncate_args /* {
2923 		syscallarg(int) fd;
2924 		syscallarg(int) pad;
2925 		syscallarg(off_t) length;
2926 	} */ nuap;
2927 
2928 	SCARG(&nuap, fd) = SCARG(uap, fd);
2929 	SCARG(&nuap, length) = SCARG(uap, length);
2930 	return (ftruncate(td, &nuap));
2931 }
2932 #endif /* COMPAT_43 || COMPAT_SUNOS */
2933 
2934 /*
2935  * Sync an open file.
2936  */
2937 #ifndef _SYS_SYSPROTO_H_
2938 struct fsync_args {
2939 	int	fd;
2940 };
2941 #endif
2942 /* ARGSUSED */
2943 int
2944 fsync(td, uap)
2945 	struct thread *td;
2946 	struct fsync_args /* {
2947 		syscallarg(int) fd;
2948 	} */ *uap;
2949 {
2950 	struct vnode *vp;
2951 	struct mount *mp;
2952 	struct file *fp;
2953 	vm_object_t obj;
2954 	int error;
2955 
2956 	GIANT_REQUIRED;
2957 
2958 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2959 		return (error);
2960 	vp = (struct vnode *)fp->f_data;
2961 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2962 		fdrop(fp, td);
2963 		return (error);
2964 	}
2965 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2966 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2967 		vm_object_page_clean(obj, 0, 0, 0);
2968 	}
2969 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2970 #ifdef SOFTUPDATES
2971 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2972 	    error = softdep_fsync(vp);
2973 #endif
2974 
2975 	VOP_UNLOCK(vp, 0, td);
2976 	vn_finished_write(mp);
2977 	fdrop(fp, td);
2978 	return (error);
2979 }
2980 
2981 /*
2982  * Rename files.  Source and destination must either both be directories,
2983  * or both not be directories.  If target is a directory, it must be empty.
2984  */
2985 #ifndef _SYS_SYSPROTO_H_
2986 struct rename_args {
2987 	char	*from;
2988 	char	*to;
2989 };
2990 #endif
2991 /* ARGSUSED */
2992 int
2993 rename(td, uap)
2994 	struct thread *td;
2995 	register struct rename_args /* {
2996 		syscallarg(char *) from;
2997 		syscallarg(char *) to;
2998 	} */ *uap;
2999 {
3000 	struct mount *mp;
3001 	struct vnode *tvp, *fvp, *tdvp;
3002 	struct nameidata fromnd, tond;
3003 	int error;
3004 
3005 	bwillwrite();
3006 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3007 	    SCARG(uap, from), td);
3008 	if ((error = namei(&fromnd)) != 0)
3009 		return (error);
3010 	fvp = fromnd.ni_vp;
3011 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3012 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3013 		vrele(fromnd.ni_dvp);
3014 		vrele(fvp);
3015 		goto out1;
3016 	}
3017 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3018 	    UIO_USERSPACE, SCARG(uap, to), td);
3019 	if (fromnd.ni_vp->v_type == VDIR)
3020 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3021 	if ((error = namei(&tond)) != 0) {
3022 		/* Translate error code for rename("dir1", "dir2/."). */
3023 		if (error == EISDIR && fvp->v_type == VDIR)
3024 			error = EINVAL;
3025 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3026 		vrele(fromnd.ni_dvp);
3027 		vrele(fvp);
3028 		goto out1;
3029 	}
3030 	tdvp = tond.ni_dvp;
3031 	tvp = tond.ni_vp;
3032 	if (tvp != NULL) {
3033 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3034 			error = ENOTDIR;
3035 			goto out;
3036 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3037 			error = EISDIR;
3038 			goto out;
3039 		}
3040 	}
3041 	if (fvp == tdvp)
3042 		error = EINVAL;
3043 	/*
3044 	 * If source is the same as the destination (that is the
3045 	 * same inode number with the same name in the same directory),
3046 	 * then there is nothing to do.
3047 	 */
3048 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3049 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3050 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3051 	      fromnd.ni_cnd.cn_namelen))
3052 		error = -1;
3053 out:
3054 	if (!error) {
3055 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3056 		if (fromnd.ni_dvp != tdvp) {
3057 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3058 		}
3059 		if (tvp) {
3060 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3061 		}
3062 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3063 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3064 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3065 		NDFREE(&tond, NDF_ONLY_PNBUF);
3066 	} else {
3067 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3068 		NDFREE(&tond, NDF_ONLY_PNBUF);
3069 		if (tdvp == tvp)
3070 			vrele(tdvp);
3071 		else
3072 			vput(tdvp);
3073 		if (tvp)
3074 			vput(tvp);
3075 		vrele(fromnd.ni_dvp);
3076 		vrele(fvp);
3077 	}
3078 	vrele(tond.ni_startdir);
3079 	vn_finished_write(mp);
3080 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3081 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3082 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3083 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3084 out1:
3085 	if (fromnd.ni_startdir)
3086 		vrele(fromnd.ni_startdir);
3087 	if (error == -1)
3088 		return (0);
3089 	return (error);
3090 }
3091 
3092 /*
3093  * Make a directory file.
3094  */
3095 #ifndef _SYS_SYSPROTO_H_
3096 struct mkdir_args {
3097 	char	*path;
3098 	int	mode;
3099 };
3100 #endif
3101 /* ARGSUSED */
3102 int
3103 mkdir(td, uap)
3104 	struct thread *td;
3105 	register struct mkdir_args /* {
3106 		syscallarg(char *) path;
3107 		syscallarg(int) mode;
3108 	} */ *uap;
3109 {
3110 
3111 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3112 }
3113 
3114 int
3115 vn_mkdir(path, mode, segflg, td)
3116 	char *path;
3117 	int mode;
3118 	enum uio_seg segflg;
3119 	struct thread *td;
3120 {
3121 	struct mount *mp;
3122 	struct vnode *vp;
3123 	struct vattr vattr;
3124 	int error;
3125 	struct nameidata nd;
3126 
3127 restart:
3128 	bwillwrite();
3129 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3130 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3131 	if ((error = namei(&nd)) != 0)
3132 		return (error);
3133 	vp = nd.ni_vp;
3134 	if (vp != NULL) {
3135 		NDFREE(&nd, NDF_ONLY_PNBUF);
3136 		vrele(vp);
3137 		vput(nd.ni_dvp);
3138 		return (EEXIST);
3139 	}
3140 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3141 		NDFREE(&nd, NDF_ONLY_PNBUF);
3142 		vput(nd.ni_dvp);
3143 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3144 			return (error);
3145 		goto restart;
3146 	}
3147 	VATTR_NULL(&vattr);
3148 	vattr.va_type = VDIR;
3149 	FILEDESC_LOCK(td->td_proc->p_fd);
3150 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3151 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3152 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3153 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3154 	NDFREE(&nd, NDF_ONLY_PNBUF);
3155 	vput(nd.ni_dvp);
3156 	if (!error)
3157 		vput(nd.ni_vp);
3158 	vn_finished_write(mp);
3159 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3160 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3161 	return (error);
3162 }
3163 
3164 /*
3165  * Remove a directory file.
3166  */
3167 #ifndef _SYS_SYSPROTO_H_
3168 struct rmdir_args {
3169 	char	*path;
3170 };
3171 #endif
3172 /* ARGSUSED */
3173 int
3174 rmdir(td, uap)
3175 	struct thread *td;
3176 	struct rmdir_args /* {
3177 		syscallarg(char *) path;
3178 	} */ *uap;
3179 {
3180 	struct mount *mp;
3181 	struct vnode *vp;
3182 	int error;
3183 	struct nameidata nd;
3184 
3185 restart:
3186 	bwillwrite();
3187 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3188 	    SCARG(uap, path), td);
3189 	if ((error = namei(&nd)) != 0)
3190 		return (error);
3191 	vp = nd.ni_vp;
3192 	if (vp->v_type != VDIR) {
3193 		error = ENOTDIR;
3194 		goto out;
3195 	}
3196 	/*
3197 	 * No rmdir "." please.
3198 	 */
3199 	if (nd.ni_dvp == vp) {
3200 		error = EINVAL;
3201 		goto out;
3202 	}
3203 	/*
3204 	 * The root of a mounted filesystem cannot be deleted.
3205 	 */
3206 	if (vp->v_flag & VROOT) {
3207 		error = EBUSY;
3208 		goto out;
3209 	}
3210 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3211 		NDFREE(&nd, NDF_ONLY_PNBUF);
3212 		if (nd.ni_dvp == vp)
3213 			vrele(nd.ni_dvp);
3214 		else
3215 			vput(nd.ni_dvp);
3216 		vput(vp);
3217 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3218 			return (error);
3219 		goto restart;
3220 	}
3221 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3222 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3223 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3224 	vn_finished_write(mp);
3225 out:
3226 	NDFREE(&nd, NDF_ONLY_PNBUF);
3227 	if (nd.ni_dvp == vp)
3228 		vrele(nd.ni_dvp);
3229 	else
3230 		vput(nd.ni_dvp);
3231 	vput(vp);
3232 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3233 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3234 	return (error);
3235 }
3236 
3237 #ifdef COMPAT_43
3238 /*
3239  * Read a block of directory entries in a file system independent format.
3240  */
3241 #ifndef _SYS_SYSPROTO_H_
3242 struct ogetdirentries_args {
3243 	int	fd;
3244 	char	*buf;
3245 	u_int	count;
3246 	long	*basep;
3247 };
3248 #endif
3249 int
3250 ogetdirentries(td, uap)
3251 	struct thread *td;
3252 	register struct ogetdirentries_args /* {
3253 		syscallarg(int) fd;
3254 		syscallarg(char *) buf;
3255 		syscallarg(u_int) count;
3256 		syscallarg(long *) basep;
3257 	} */ *uap;
3258 {
3259 	struct vnode *vp;
3260 	struct file *fp;
3261 	struct uio auio, kuio;
3262 	struct iovec aiov, kiov;
3263 	struct dirent *dp, *edp;
3264 	caddr_t dirbuf;
3265 	int error, eofflag, readcnt;
3266 	long loff;
3267 
3268 	/* XXX arbitrary sanity limit on `count'. */
3269 	if (SCARG(uap, count) > 64 * 1024)
3270 		return (EINVAL);
3271 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3272 		return (error);
3273 	if ((fp->f_flag & FREAD) == 0) {
3274 		fdrop(fp, td);
3275 		return (EBADF);
3276 	}
3277 	vp = (struct vnode *)fp->f_data;
3278 unionread:
3279 	if (vp->v_type != VDIR) {
3280 		fdrop(fp, td);
3281 		return (EINVAL);
3282 	}
3283 	aiov.iov_base = SCARG(uap, buf);
3284 	aiov.iov_len = SCARG(uap, count);
3285 	auio.uio_iov = &aiov;
3286 	auio.uio_iovcnt = 1;
3287 	auio.uio_rw = UIO_READ;
3288 	auio.uio_segflg = UIO_USERSPACE;
3289 	auio.uio_td = td;
3290 	auio.uio_resid = SCARG(uap, count);
3291 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3292 	loff = auio.uio_offset = fp->f_offset;
3293 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3294 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3295 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3296 			    NULL, NULL);
3297 			fp->f_offset = auio.uio_offset;
3298 		} else
3299 #	endif
3300 	{
3301 		kuio = auio;
3302 		kuio.uio_iov = &kiov;
3303 		kuio.uio_segflg = UIO_SYSSPACE;
3304 		kiov.iov_len = SCARG(uap, count);
3305 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3306 		kiov.iov_base = dirbuf;
3307 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3308 			    NULL, NULL);
3309 		fp->f_offset = kuio.uio_offset;
3310 		if (error == 0) {
3311 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3312 			edp = (struct dirent *)&dirbuf[readcnt];
3313 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3314 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3315 					/*
3316 					 * The expected low byte of
3317 					 * dp->d_namlen is our dp->d_type.
3318 					 * The high MBZ byte of dp->d_namlen
3319 					 * is our dp->d_namlen.
3320 					 */
3321 					dp->d_type = dp->d_namlen;
3322 					dp->d_namlen = 0;
3323 #				else
3324 					/*
3325 					 * The dp->d_type is the high byte
3326 					 * of the expected dp->d_namlen,
3327 					 * so must be zero'ed.
3328 					 */
3329 					dp->d_type = 0;
3330 #				endif
3331 				if (dp->d_reclen > 0) {
3332 					dp = (struct dirent *)
3333 					    ((char *)dp + dp->d_reclen);
3334 				} else {
3335 					error = EIO;
3336 					break;
3337 				}
3338 			}
3339 			if (dp >= edp)
3340 				error = uiomove(dirbuf, readcnt, &auio);
3341 		}
3342 		FREE(dirbuf, M_TEMP);
3343 	}
3344 	VOP_UNLOCK(vp, 0, td);
3345 	if (error) {
3346 		fdrop(fp, td);
3347 		return (error);
3348 	}
3349 	if (SCARG(uap, count) == auio.uio_resid) {
3350 		if (union_dircheckp) {
3351 			error = union_dircheckp(td, &vp, fp);
3352 			if (error == -1)
3353 				goto unionread;
3354 			if (error) {
3355 				fdrop(fp, td);
3356 				return (error);
3357 			}
3358 		}
3359 		if ((vp->v_flag & VROOT) &&
3360 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3361 			struct vnode *tvp = vp;
3362 			vp = vp->v_mount->mnt_vnodecovered;
3363 			VREF(vp);
3364 			fp->f_data = (caddr_t) vp;
3365 			fp->f_offset = 0;
3366 			vrele(tvp);
3367 			goto unionread;
3368 		}
3369 	}
3370 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3371 	    sizeof(long));
3372 	fdrop(fp, td);
3373 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3374 	return (error);
3375 }
3376 #endif /* COMPAT_43 */
3377 
3378 /*
3379  * Read a block of directory entries in a file system independent format.
3380  */
3381 #ifndef _SYS_SYSPROTO_H_
3382 struct getdirentries_args {
3383 	int	fd;
3384 	char	*buf;
3385 	u_int	count;
3386 	long	*basep;
3387 };
3388 #endif
3389 int
3390 getdirentries(td, uap)
3391 	struct thread *td;
3392 	register struct getdirentries_args /* {
3393 		syscallarg(int) fd;
3394 		syscallarg(char *) buf;
3395 		syscallarg(u_int) count;
3396 		syscallarg(long *) basep;
3397 	} */ *uap;
3398 {
3399 	struct vnode *vp;
3400 	struct file *fp;
3401 	struct uio auio;
3402 	struct iovec aiov;
3403 	long loff;
3404 	int error, eofflag;
3405 
3406 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3407 		return (error);
3408 	if ((fp->f_flag & FREAD) == 0) {
3409 		fdrop(fp, td);
3410 		return (EBADF);
3411 	}
3412 	vp = (struct vnode *)fp->f_data;
3413 unionread:
3414 	if (vp->v_type != VDIR) {
3415 		fdrop(fp, td);
3416 		return (EINVAL);
3417 	}
3418 	aiov.iov_base = SCARG(uap, buf);
3419 	aiov.iov_len = SCARG(uap, count);
3420 	auio.uio_iov = &aiov;
3421 	auio.uio_iovcnt = 1;
3422 	auio.uio_rw = UIO_READ;
3423 	auio.uio_segflg = UIO_USERSPACE;
3424 	auio.uio_td = td;
3425 	auio.uio_resid = SCARG(uap, count);
3426 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3427 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3428 	loff = auio.uio_offset = fp->f_offset;
3429 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3430 	fp->f_offset = auio.uio_offset;
3431 	VOP_UNLOCK(vp, 0, td);
3432 	if (error) {
3433 		fdrop(fp, td);
3434 		return (error);
3435 	}
3436 	if (SCARG(uap, count) == auio.uio_resid) {
3437 		if (union_dircheckp) {
3438 			error = union_dircheckp(td, &vp, fp);
3439 			if (error == -1)
3440 				goto unionread;
3441 			if (error) {
3442 				fdrop(fp, td);
3443 				return (error);
3444 			}
3445 		}
3446 		if ((vp->v_flag & VROOT) &&
3447 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3448 			struct vnode *tvp = vp;
3449 			vp = vp->v_mount->mnt_vnodecovered;
3450 			VREF(vp);
3451 			fp->f_data = (caddr_t) vp;
3452 			fp->f_offset = 0;
3453 			vrele(tvp);
3454 			goto unionread;
3455 		}
3456 	}
3457 	if (SCARG(uap, basep) != NULL) {
3458 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3459 		    sizeof(long));
3460 	}
3461 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3462 	fdrop(fp, td);
3463 	return (error);
3464 }
3465 #ifndef _SYS_SYSPROTO_H_
3466 struct getdents_args {
3467 	int fd;
3468 	char *buf;
3469 	size_t count;
3470 };
3471 #endif
3472 int
3473 getdents(td, uap)
3474 	struct thread *td;
3475 	register struct getdents_args /* {
3476 		syscallarg(int) fd;
3477 		syscallarg(char *) buf;
3478 		syscallarg(u_int) count;
3479 	} */ *uap;
3480 {
3481 	struct getdirentries_args ap;
3482 	ap.fd = uap->fd;
3483 	ap.buf = uap->buf;
3484 	ap.count = uap->count;
3485 	ap.basep = NULL;
3486 	return getdirentries(td, &ap);
3487 }
3488 
3489 /*
3490  * Set the mode mask for creation of filesystem nodes.
3491  *
3492  * MP SAFE
3493  */
3494 #ifndef _SYS_SYSPROTO_H_
3495 struct umask_args {
3496 	int	newmask;
3497 };
3498 #endif
3499 int
3500 umask(td, uap)
3501 	struct thread *td;
3502 	struct umask_args /* {
3503 		syscallarg(int) newmask;
3504 	} */ *uap;
3505 {
3506 	register struct filedesc *fdp;
3507 
3508 	FILEDESC_LOCK(td->td_proc->p_fd);
3509 	fdp = td->td_proc->p_fd;
3510 	td->td_retval[0] = fdp->fd_cmask;
3511 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3512 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3513 	return (0);
3514 }
3515 
3516 /*
3517  * Void all references to file by ripping underlying filesystem
3518  * away from vnode.
3519  */
3520 #ifndef _SYS_SYSPROTO_H_
3521 struct revoke_args {
3522 	char	*path;
3523 };
3524 #endif
3525 /* ARGSUSED */
3526 int
3527 revoke(td, uap)
3528 	struct thread *td;
3529 	register struct revoke_args /* {
3530 		syscallarg(char *) path;
3531 	} */ *uap;
3532 {
3533 	struct mount *mp;
3534 	struct vnode *vp;
3535 	struct vattr vattr;
3536 	int error;
3537 	struct nameidata nd;
3538 
3539 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3540 	    td);
3541 	if ((error = namei(&nd)) != 0)
3542 		return (error);
3543 	vp = nd.ni_vp;
3544 	NDFREE(&nd, NDF_ONLY_PNBUF);
3545 	if (vp->v_type != VCHR) {
3546 		vput(vp);
3547 		return (EINVAL);
3548 	}
3549 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3550 	if (error) {
3551 		vput(vp);
3552 		return (error);
3553 	}
3554 	VOP_UNLOCK(vp, 0, td);
3555 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3556 		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
3557 		if (error)
3558 			goto out;
3559 	}
3560 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3561 		goto out;
3562 	if (vcount(vp) > 1)
3563 		VOP_REVOKE(vp, REVOKEALL);
3564 	vn_finished_write(mp);
3565 out:
3566 	vrele(vp);
3567 	return (error);
3568 }
3569 
3570 /*
3571  * Convert a user file descriptor to a kernel file entry.
3572  * The file entry is locked upon returning.
3573  */
3574 int
3575 getvnode(fdp, fd, fpp)
3576 	struct filedesc *fdp;
3577 	int fd;
3578 	struct file **fpp;
3579 {
3580 	int error;
3581 	struct file *fp;
3582 
3583 	fp = NULL;
3584 	if (fdp == NULL)
3585 		error = EBADF;
3586 	else {
3587 		FILEDESC_LOCK(fdp);
3588 		if ((u_int)fd >= fdp->fd_nfiles ||
3589 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3590 			error = EBADF;
3591 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3592 			fp = NULL;
3593 			error = EINVAL;
3594 		} else {
3595 			fhold(fp);
3596 			error = 0;
3597 		}
3598 		FILEDESC_UNLOCK(fdp);
3599 	}
3600 	*fpp = fp;
3601 	return (error);
3602 }
3603 /*
3604  * Get (NFS) file handle
3605  */
3606 #ifndef _SYS_SYSPROTO_H_
3607 struct getfh_args {
3608 	char	*fname;
3609 	fhandle_t *fhp;
3610 };
3611 #endif
3612 int
3613 getfh(td, uap)
3614 	struct thread *td;
3615 	register struct getfh_args *uap;
3616 {
3617 	struct nameidata nd;
3618 	fhandle_t fh;
3619 	register struct vnode *vp;
3620 	int error;
3621 
3622 	/*
3623 	 * Must be super user
3624 	 */
3625 	error = suser_td(td);
3626 	if (error)
3627 		return (error);
3628 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3629 	error = namei(&nd);
3630 	if (error)
3631 		return (error);
3632 	NDFREE(&nd, NDF_ONLY_PNBUF);
3633 	vp = nd.ni_vp;
3634 	bzero(&fh, sizeof(fh));
3635 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3636 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3637 	vput(vp);
3638 	if (error)
3639 		return (error);
3640 	error = copyout(&fh, uap->fhp, sizeof (fh));
3641 	return (error);
3642 }
3643 
3644 /*
3645  * syscall for the rpc.lockd to use to translate a NFS file handle into
3646  * an open descriptor.
3647  *
3648  * warning: do not remove the suser() call or this becomes one giant
3649  * security hole.
3650  */
3651 #ifndef _SYS_SYSPROTO_H_
3652 struct fhopen_args {
3653 	const struct fhandle *u_fhp;
3654 	int flags;
3655 };
3656 #endif
3657 int
3658 fhopen(td, uap)
3659 	struct thread *td;
3660 	struct fhopen_args /* {
3661 		syscallarg(const struct fhandle *) u_fhp;
3662 		syscallarg(int) flags;
3663 	} */ *uap;
3664 {
3665 	struct proc *p = td->td_proc;
3666 	struct mount *mp;
3667 	struct vnode *vp;
3668 	struct fhandle fhp;
3669 	struct vattr vat;
3670 	struct vattr *vap = &vat;
3671 	struct flock lf;
3672 	struct file *fp;
3673 	register struct filedesc *fdp = p->p_fd;
3674 	int fmode, mode, error, type;
3675 	struct file *nfp;
3676 	int indx;
3677 
3678 	/*
3679 	 * Must be super user
3680 	 */
3681 	error = suser_td(td);
3682 	if (error)
3683 		return (error);
3684 
3685 	fmode = FFLAGS(SCARG(uap, flags));
3686 	/* why not allow a non-read/write open for our lockd? */
3687 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3688 		return (EINVAL);
3689 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3690 	if (error)
3691 		return(error);
3692 	/* find the mount point */
3693 	mp = vfs_getvfs(&fhp.fh_fsid);
3694 	if (mp == NULL)
3695 		return (ESTALE);
3696 	/* now give me my vnode, it gets returned to me locked */
3697 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3698 	if (error)
3699 		return (error);
3700  	/*
3701 	 * from now on we have to make sure not
3702 	 * to forget about the vnode
3703 	 * any error that causes an abort must vput(vp)
3704 	 * just set error = err and 'goto bad;'.
3705 	 */
3706 
3707 	/*
3708 	 * from vn_open
3709 	 */
3710 	if (vp->v_type == VLNK) {
3711 		error = EMLINK;
3712 		goto bad;
3713 	}
3714 	if (vp->v_type == VSOCK) {
3715 		error = EOPNOTSUPP;
3716 		goto bad;
3717 	}
3718 	mode = 0;
3719 	if (fmode & (FWRITE | O_TRUNC)) {
3720 		if (vp->v_type == VDIR) {
3721 			error = EISDIR;
3722 			goto bad;
3723 		}
3724 		error = vn_writechk(vp);
3725 		if (error)
3726 			goto bad;
3727 		mode |= VWRITE;
3728 	}
3729 	if (fmode & FREAD)
3730 		mode |= VREAD;
3731 	if (mode) {
3732 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3733 		if (error)
3734 			goto bad;
3735 	}
3736 	if (fmode & O_TRUNC) {
3737 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3738 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3739 			vrele(vp);
3740 			return (error);
3741 		}
3742 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3743 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3744 		VATTR_NULL(vap);
3745 		vap->va_size = 0;
3746 		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3747 		vn_finished_write(mp);
3748 		if (error)
3749 			goto bad;
3750 	}
3751 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3752 	if (error)
3753 		goto bad;
3754 	/*
3755 	 * Make sure that a VM object is created for VMIO support.
3756 	 */
3757 	if (vn_canvmio(vp) == TRUE) {
3758 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3759 			goto bad;
3760 	}
3761 	if (fmode & FWRITE)
3762 		vp->v_writecount++;
3763 
3764 	/*
3765 	 * end of vn_open code
3766 	 */
3767 
3768 	if ((error = falloc(td, &nfp, &indx)) != 0) {
3769 		if (fmode & FWRITE)
3770 			vp->v_writecount--;
3771 		goto bad;
3772 	}
3773 	fp = nfp;
3774 
3775 	/*
3776 	 * Hold an extra reference to avoid having fp ripped out
3777 	 * from under us while we block in the lock op
3778 	 */
3779 	fhold(fp);
3780 	nfp->f_data = (caddr_t)vp;
3781 	nfp->f_flag = fmode & FMASK;
3782 	nfp->f_ops = &vnops;
3783 	nfp->f_type = DTYPE_VNODE;
3784 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3785 		lf.l_whence = SEEK_SET;
3786 		lf.l_start = 0;
3787 		lf.l_len = 0;
3788 		if (fmode & O_EXLOCK)
3789 			lf.l_type = F_WRLCK;
3790 		else
3791 			lf.l_type = F_RDLCK;
3792 		type = F_FLOCK;
3793 		if ((fmode & FNONBLOCK) == 0)
3794 			type |= F_WAIT;
3795 		VOP_UNLOCK(vp, 0, td);
3796 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3797 			/*
3798 			 * The lock request failed.  Normally close the
3799 			 * descriptor but handle the case where someone might
3800 			 * have dup()d or close()d it when we weren't looking.
3801 			 */
3802 			FILEDESC_LOCK(fdp);
3803 			if (fdp->fd_ofiles[indx] == fp) {
3804 				fdp->fd_ofiles[indx] = NULL;
3805 				FILEDESC_UNLOCK(fdp);
3806 				fdrop(fp, td);
3807 			} else
3808 				FILEDESC_UNLOCK(fdp);
3809 			/*
3810 			 * release our private reference
3811 			 */
3812 			fdrop(fp, td);
3813 			return(error);
3814 		}
3815 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3816 		fp->f_flag |= FHASLOCK;
3817 	}
3818 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3819 		vfs_object_create(vp, td, td->td_ucred);
3820 
3821 	VOP_UNLOCK(vp, 0, td);
3822 	fdrop(fp, td);
3823 	td->td_retval[0] = indx;
3824 	return (0);
3825 
3826 bad:
3827 	vput(vp);
3828 	return (error);
3829 }
3830 
3831 /*
3832  * Stat an (NFS) file handle.
3833  */
3834 #ifndef _SYS_SYSPROTO_H_
3835 struct fhstat_args {
3836 	struct fhandle *u_fhp;
3837 	struct stat *sb;
3838 };
3839 #endif
3840 int
3841 fhstat(td, uap)
3842 	struct thread *td;
3843 	register struct fhstat_args /* {
3844 		syscallarg(struct fhandle *) u_fhp;
3845 		syscallarg(struct stat *) sb;
3846 	} */ *uap;
3847 {
3848 	struct stat sb;
3849 	fhandle_t fh;
3850 	struct mount *mp;
3851 	struct vnode *vp;
3852 	int error;
3853 
3854 	/*
3855 	 * Must be super user
3856 	 */
3857 	error = suser_td(td);
3858 	if (error)
3859 		return (error);
3860 
3861 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3862 	if (error)
3863 		return (error);
3864 
3865 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3866 		return (ESTALE);
3867 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3868 		return (error);
3869 	error = vn_stat(vp, &sb, td);
3870 	vput(vp);
3871 	if (error)
3872 		return (error);
3873 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3874 	return (error);
3875 }
3876 
3877 /*
3878  * Implement fstatfs() for (NFS) file handles.
3879  */
3880 #ifndef _SYS_SYSPROTO_H_
3881 struct fhstatfs_args {
3882 	struct fhandle *u_fhp;
3883 	struct statfs *buf;
3884 };
3885 #endif
3886 int
3887 fhstatfs(td, uap)
3888 	struct thread *td;
3889 	struct fhstatfs_args /* {
3890 		syscallarg(struct fhandle) *u_fhp;
3891 		syscallarg(struct statfs) *buf;
3892 	} */ *uap;
3893 {
3894 	struct statfs *sp;
3895 	struct mount *mp;
3896 	struct vnode *vp;
3897 	struct statfs sb;
3898 	fhandle_t fh;
3899 	int error;
3900 
3901 	/*
3902 	 * Must be super user
3903 	 */
3904 	error = suser_td(td);
3905 	if (error)
3906 		return (error);
3907 
3908 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3909 		return (error);
3910 
3911 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3912 		return (ESTALE);
3913 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3914 		return (error);
3915 	mp = vp->v_mount;
3916 	sp = &mp->mnt_stat;
3917 	vput(vp);
3918 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3919 		return (error);
3920 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3921 	if (suser_xxx(td->td_ucred, 0, 0)) {
3922 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3923 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3924 		sp = &sb;
3925 	}
3926 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3927 }
3928 
3929 /*
3930  * Syscall to push extended attribute configuration information into the
3931  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3932  * a command (int cmd), and attribute name and misc data.  For now, the
3933  * attribute name is left in userspace for consumption by the VFS_op.
3934  * It will probably be changed to be copied into sysspace by the
3935  * syscall in the future, once issues with various consumers of the
3936  * attribute code have raised their hands.
3937  *
3938  * Currently this is used only by UFS Extended Attributes.
3939  */
3940 int
3941 extattrctl(td, uap)
3942 	struct thread *td;
3943 	struct extattrctl_args *uap;
3944 {
3945 	struct vnode *filename_vp;
3946 	struct nameidata nd;
3947 	struct mount *mp, *mp_writable;
3948 	char attrname[EXTATTR_MAXNAMELEN];
3949 	int error;
3950 
3951 	/*
3952 	 * SCARG(uap, attrname) not always defined.  We check again later
3953 	 * when we invoke the VFS call so as to pass in NULL there if needed.
3954 	 */
3955 	if (SCARG(uap, attrname) != NULL) {
3956 		error = copyinstr(SCARG(uap, attrname), attrname,
3957 		    EXTATTR_MAXNAMELEN, NULL);
3958 		if (error)
3959 			return (error);
3960 	}
3961 
3962 	/*
3963 	 * SCARG(uap, filename) not always defined.  If it is, grab
3964 	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3965 	 */
3966 	filename_vp = NULL;
3967 	if (SCARG(uap, filename) != NULL) {
3968 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3969 		    SCARG(uap, filename), td);
3970 		if ((error = namei(&nd)) != 0)
3971 			return (error);
3972 		filename_vp = nd.ni_vp;
3973 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3974 	}
3975 
3976 	/* SCARG(uap, path) always defined. */
3977 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3978 	if ((error = namei(&nd)) != 0) {
3979 		if (filename_vp != NULL)
3980 			vput(filename_vp);
3981 		return (error);
3982 	}
3983 	mp = nd.ni_vp->v_mount;
3984 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3985 	NDFREE(&nd, 0);
3986 	if (error) {
3987 		if (filename_vp != NULL)
3988 			vput(filename_vp);
3989 		return (error);
3990 	}
3991 
3992 	if (SCARG(uap, attrname) != NULL) {
3993 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3994 		    SCARG(uap, attrnamespace), attrname, td);
3995 	} else {
3996 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3997 		    SCARG(uap, attrnamespace), NULL, td);
3998 	}
3999 
4000 	vn_finished_write(mp_writable);
4001 	/*
4002 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4003 	 * filename_vp, so vrele it if it is defined.
4004 	 */
4005 	if (filename_vp != NULL)
4006 		vrele(filename_vp);
4007 
4008 	return (error);
4009 }
4010 
4011 /*-
4012  * Set a named extended attribute on a file or directory
4013  *
4014  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4015  *            kernelspace string pointer "attrname", userspace buffer
4016  *            pointer "data", buffer length "nbytes", thread "td".
4017  * Returns: 0 on success, an error number otherwise
4018  * Locks: none
4019  * References: vp must be a valid reference for the duration of the call
4020  */
4021 static int
4022 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4023     void *data, size_t nbytes, struct thread *td)
4024 {
4025 	struct mount *mp;
4026 	struct uio auio;
4027 	struct iovec aiov;
4028 	ssize_t cnt;
4029 	int error;
4030 
4031 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4032 		return (error);
4033 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4034 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4035 
4036 	aiov.iov_base = data;
4037 	aiov.iov_len = nbytes;
4038 	auio.uio_iov = &aiov;
4039 	auio.uio_iovcnt = 1;
4040 	auio.uio_offset = 0;
4041 	if (nbytes > INT_MAX) {
4042 		error = EINVAL;
4043 		goto done;
4044 	}
4045 	auio.uio_resid = nbytes;
4046 	auio.uio_rw = UIO_WRITE;
4047 	auio.uio_segflg = UIO_USERSPACE;
4048 	auio.uio_td = td;
4049 	cnt = nbytes;
4050 
4051 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4052 	    td->td_ucred, td);
4053 	cnt -= auio.uio_resid;
4054 	td->td_retval[0] = cnt;
4055 
4056 done:
4057 	VOP_UNLOCK(vp, 0, td);
4058 	vn_finished_write(mp);
4059 	return (error);
4060 }
4061 
4062 int
4063 extattr_set_file(td, uap)
4064 	struct thread *td;
4065 	struct extattr_set_file_args *uap;
4066 {
4067 	struct nameidata nd;
4068 	char attrname[EXTATTR_MAXNAMELEN];
4069 	int error;
4070 
4071 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4072 	    NULL);
4073 	if (error)
4074 		return (error);
4075 
4076 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4077 	if ((error = namei(&nd)) != 0)
4078 		return (error);
4079 	NDFREE(&nd, NDF_ONLY_PNBUF);
4080 
4081 	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4082 	    SCARG(uap, data), SCARG(uap, nbytes), td);
4083 
4084 	vrele(nd.ni_vp);
4085 	return (error);
4086 }
4087 
4088 int
4089 extattr_set_fd(td, uap)
4090 	struct thread *td;
4091 	struct extattr_set_fd_args *uap;
4092 {
4093 	struct file *fp;
4094 	char attrname[EXTATTR_MAXNAMELEN];
4095 	int error;
4096 
4097 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4098 	    NULL);
4099 	if (error)
4100 		return (error);
4101 
4102 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4103 		return (error);
4104 
4105 	error = extattr_set_vp((struct vnode *)fp->f_data,
4106 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4107 	    SCARG(uap, nbytes), td);
4108 	fdrop(fp, td);
4109 
4110 	return (error);
4111 }
4112 
4113 /*-
4114  * Get a named extended attribute on a file or directory
4115  *
4116  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4117  *            kernelspace string pointer "attrname", userspace buffer
4118  *            pointer "data", buffer length "nbytes", thread "td".
4119  * Returns: 0 on success, an error number otherwise
4120  * Locks: none
4121  * References: vp must be a valid reference for the duration of the call
4122  */
4123 static int
4124 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4125     void *data, size_t nbytes, struct thread *td)
4126 {
4127 	struct uio auio;
4128 	struct iovec aiov;
4129 	ssize_t cnt;
4130 	size_t size;
4131 	int error;
4132 
4133 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4134 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4135 
4136 	/*
4137 	 * Slightly unusual semantics: if the user provides a NULL data
4138 	 * pointer, they don't want to receive the data, just the
4139 	 * maximum read length.
4140 	 */
4141 	if (data != NULL) {
4142 		aiov.iov_base = data;
4143 		aiov.iov_len = nbytes;
4144 		auio.uio_iov = &aiov;
4145 		auio.uio_offset = 0;
4146 		if (nbytes > INT_MAX) {
4147 			error = EINVAL;
4148 			goto done;
4149 		}
4150 		auio.uio_resid = nbytes;
4151 		auio.uio_rw = UIO_READ;
4152 		auio.uio_segflg = UIO_USERSPACE;
4153 		auio.uio_td = td;
4154 		cnt = nbytes;
4155 		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4156 		    NULL, td->td_ucred, td);
4157 		cnt -= auio.uio_resid;
4158 		td->td_retval[0] = cnt;
4159 	} else {
4160 		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
4161 		    &size, td->td_ucred, td);
4162 		td->td_retval[0] = size;
4163 	}
4164 done:
4165 	VOP_UNLOCK(vp, 0, td);
4166 	return (error);
4167 }
4168 
4169 int
4170 extattr_get_file(td, uap)
4171 	struct thread *td;
4172 	struct extattr_get_file_args *uap;
4173 {
4174 	struct nameidata nd;
4175 	char attrname[EXTATTR_MAXNAMELEN];
4176 	int error;
4177 
4178 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4179 	    NULL);
4180 	if (error)
4181 		return (error);
4182 
4183 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4184 	if ((error = namei(&nd)) != 0)
4185 		return (error);
4186 	NDFREE(&nd, NDF_ONLY_PNBUF);
4187 
4188 	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4189 	    SCARG(uap, data), SCARG(uap, nbytes), td);
4190 
4191 	vrele(nd.ni_vp);
4192 	return (error);
4193 }
4194 
4195 int
4196 extattr_get_fd(td, uap)
4197 	struct thread *td;
4198 	struct extattr_get_fd_args *uap;
4199 {
4200 	struct file *fp;
4201 	char attrname[EXTATTR_MAXNAMELEN];
4202 	int error;
4203 
4204 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4205 	    NULL);
4206 	if (error)
4207 		return (error);
4208 
4209 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4210 		return (error);
4211 
4212 	error = extattr_get_vp((struct vnode *)fp->f_data,
4213 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4214 	    SCARG(uap, nbytes), td);
4215 
4216 	fdrop(fp, td);
4217 	return (error);
4218 }
4219 
4220 /*
4221  * extattr_delete_vp(): Delete a named extended attribute on a file or
4222  *                      directory
4223  *
4224  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4225  *            kernelspace string pointer "attrname", proc "p"
4226  * Returns: 0 on success, an error number otherwise
4227  * Locks: none
4228  * References: vp must be a valid reference for the duration of the call
4229  */
4230 static int
4231 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4232     struct thread *td)
4233 {
4234 	struct mount *mp;
4235 	int error;
4236 
4237 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4238 		return (error);
4239 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4240 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4241 
4242 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4243 	    td);
4244 
4245 	VOP_UNLOCK(vp, 0, td);
4246 	vn_finished_write(mp);
4247 	return (error);
4248 }
4249 
4250 int
4251 extattr_delete_file(td, uap)
4252 	struct thread *td;
4253 	struct extattr_delete_file_args *uap;
4254 {
4255 	struct nameidata nd;
4256 	char attrname[EXTATTR_MAXNAMELEN];
4257 	int error;
4258 
4259 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4260 	     NULL);
4261 	if (error)
4262 		return(error);
4263 
4264 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4265 	if ((error = namei(&nd)) != 0)
4266 		return(error);
4267 	NDFREE(&nd, NDF_ONLY_PNBUF);
4268 
4269 	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4270 	    attrname, td);
4271 
4272 	vrele(nd.ni_vp);
4273 	return(error);
4274 }
4275 
4276 int
4277 extattr_delete_fd(td, uap)
4278 	struct thread *td;
4279 	struct extattr_delete_fd_args *uap;
4280 {
4281 	struct file *fp;
4282 	struct vnode *vp;
4283 	char attrname[EXTATTR_MAXNAMELEN];
4284 	int error;
4285 
4286 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4287 	    NULL);
4288 	if (error)
4289 		return (error);
4290 
4291 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4292 		return (error);
4293 	vp = (struct vnode *)fp->f_data;
4294 
4295 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4296 	    SCARG(uap, attrnamespace), attrname, td);
4297 
4298 	fdrop(fp, td);
4299 	return (error);
4300 }
4301