xref: /freebsd/sys/kern/vfs_extattr.c (revision 6990ffd8a95caaba6858ad44ff1b3157d1efba8f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_zone.h>
76 #include <vm/vm_page.h>
77 
78 static int change_dir __P((struct nameidata *ndp, struct thread *td));
79 static void checkdirs __P((struct vnode *olddp, struct vnode *newdp));
80 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
81 static int getutimes __P((const struct timeval *, struct timespec *));
82 static int setfown __P((struct thread *td, struct vnode *, uid_t, gid_t));
83 static int setfmode __P((struct thread *td, struct vnode *, int));
84 static int setfflags __P((struct thread *td, struct vnode *, int));
85 static int setutimes __P((struct thread *td, struct vnode *,
86     const struct timespec *, int));
87 static int	usermount = 0;	/* if 1, non-root can mount fs. */
88 
89 int (*union_dircheckp) __P((struct thread *td, struct vnode **, struct file *));
90 
91 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
92 
93 /*
94  * Virtual File System System Calls
95  */
96 
97 /*
98  * Mount a file system.
99  */
100 #ifndef _SYS_SYSPROTO_H_
101 struct mount_args {
102 	char	*type;
103 	char	*path;
104 	int	flags;
105 	caddr_t	data;
106 };
107 #endif
108 /* ARGSUSED */
109 int
110 mount(td, uap)
111 	struct thread *td;
112 	struct mount_args /* {
113 		syscallarg(char *) type;
114 		syscallarg(char *) path;
115 		syscallarg(int) flags;
116 		syscallarg(caddr_t) data;
117 	} */ *uap;
118 {
119 	char *fstype;
120 	char *fspath;
121 	int error;
122 
123 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
124 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
125 
126 	/*
127 	 * vfs_mount() actually takes a kernel string for `type' and
128 	 * `path' now, so extract them.
129 	 */
130 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
131 	if (error)
132 		goto finish;
133 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
134 	if (error)
135 		goto finish;
136 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
137 	    SCARG(uap, data));
138 finish:
139 	free(fstype, M_TEMP);
140 	free(fspath, M_TEMP);
141 	return (error);
142 }
143 
144 /*
145  * vfs_mount(): actually attempt a filesystem mount.
146  *
147  * This routine is designed to be a "generic" entry point for routines
148  * that wish to mount a filesystem. All parameters except `fsdata' are
149  * pointers into kernel space. `fsdata' is currently still a pointer
150  * into userspace.
151  */
152 int
153 vfs_mount(td, fstype, fspath, fsflags, fsdata)
154 	struct thread *td;
155 	const char *fstype;
156 	char *fspath;
157 	int fsflags;
158 	void *fsdata;
159 {
160 	struct vnode *vp;
161 	struct mount *mp;
162 	struct vfsconf *vfsp;
163 	int error, flag = 0, flag2 = 0;
164 	struct vattr va;
165 	struct nameidata nd;
166 	struct proc *p = td->td_proc;
167 
168 	/*
169 	 * Be ultra-paranoid about making sure the type and fspath
170 	 * variables will fit in our mp buffers, including the
171 	 * terminating NUL.
172 	 */
173 	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
174 	    (strlen(fspath) >= MNAMELEN - 1))
175 		return (ENAMETOOLONG);
176 
177 	if (usermount == 0 && (error = suser_td(td)))
178 		return (error);
179 	/*
180 	 * Do not allow NFS export by non-root users.
181 	 */
182 	if (fsflags & MNT_EXPORTED) {
183 		error = suser_td(td);
184 		if (error)
185 			return (error);
186 	}
187 	/*
188 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
189 	 */
190 	if (suser_xxx(p->p_ucred, 0, 0))
191 		fsflags |= MNT_NOSUID | MNT_NODEV;
192 	/*
193 	 * Get vnode to be covered
194 	 */
195 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
196 	if ((error = namei(&nd)) != 0)
197 		return (error);
198 	NDFREE(&nd, NDF_ONLY_PNBUF);
199 	vp = nd.ni_vp;
200 	if (fsflags & MNT_UPDATE) {
201 		if ((vp->v_flag & VROOT) == 0) {
202 			vput(vp);
203 			return (EINVAL);
204 		}
205 		mp = vp->v_mount;
206 		flag = mp->mnt_flag;
207 		flag2 = mp->mnt_kern_flag;
208 		/*
209 		 * We only allow the filesystem to be reloaded if it
210 		 * is currently mounted read-only.
211 		 */
212 		if ((fsflags & MNT_RELOAD) &&
213 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
214 			vput(vp);
215 			return (EOPNOTSUPP);	/* Needs translation */
216 		}
217 		/*
218 		 * Only root, or the user that did the original mount is
219 		 * permitted to update it.
220 		 */
221 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
222 		    (error = suser_td(td))) {
223 			vput(vp);
224 			return (error);
225 		}
226 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
227 			vput(vp);
228 			return (EBUSY);
229 		}
230 		mtx_lock(&vp->v_interlock);
231 		if ((vp->v_flag & VMOUNT) != 0 ||
232 		    vp->v_mountedhere != NULL) {
233 			mtx_unlock(&vp->v_interlock);
234 			vfs_unbusy(mp, td);
235 			vput(vp);
236 			return (EBUSY);
237 		}
238 		vp->v_flag |= VMOUNT;
239 		mtx_unlock(&vp->v_interlock);
240 		mp->mnt_flag |= fsflags &
241 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
242 		VOP_UNLOCK(vp, 0, td);
243 		goto update;
244 	}
245 	/*
246 	 * If the user is not root, ensure that they own the directory
247 	 * onto which we are attempting to mount.
248 	 */
249 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, td)) ||
250 	    (va.va_uid != p->p_ucred->cr_uid &&
251 	     (error = suser_td(td)))) {
252 		vput(vp);
253 		return (error);
254 	}
255 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, td, 0, 0)) != 0) {
256 		vput(vp);
257 		return (error);
258 	}
259 	if (vp->v_type != VDIR) {
260 		vput(vp);
261 		return (ENOTDIR);
262 	}
263 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
264 		if (!strcmp(vfsp->vfc_name, fstype))
265 			break;
266 	if (vfsp == NULL) {
267 		linker_file_t lf;
268 
269 		/* Only load modules for root (very important!) */
270 		if ((error = suser_td(td)) != 0) {
271 			vput(vp);
272 			return error;
273 		}
274 		error = linker_load_file(fstype, &lf);
275 		if (error || lf == NULL) {
276 			vput(vp);
277 			if (lf == NULL)
278 				error = ENODEV;
279 			return error;
280 		}
281 		lf->userrefs++;
282 		/* lookup again, see if the VFS was loaded */
283 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
284 			if (!strcmp(vfsp->vfc_name, fstype))
285 				break;
286 		if (vfsp == NULL) {
287 			lf->userrefs--;
288 			linker_file_unload(lf);
289 			vput(vp);
290 			return (ENODEV);
291 		}
292 	}
293 	mtx_lock(&vp->v_interlock);
294 	if ((vp->v_flag & VMOUNT) != 0 ||
295 	    vp->v_mountedhere != NULL) {
296 		mtx_unlock(&vp->v_interlock);
297 		vput(vp);
298 		return (EBUSY);
299 	}
300 	vp->v_flag |= VMOUNT;
301 	mtx_unlock(&vp->v_interlock);
302 
303 	/*
304 	 * Allocate and initialize the filesystem.
305 	 */
306 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
307 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
308 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
309 	mp->mnt_op = vfsp->vfc_vfsops;
310 	mp->mnt_vfc = vfsp;
311 	vfsp->vfc_refcount++;
312 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
313 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
314 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
315 	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
316 	mp->mnt_vnodecovered = vp;
317 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
318 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
319 	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
320 	mp->mnt_iosize_max = DFLTPHYS;
321 	VOP_UNLOCK(vp, 0, td);
322 update:
323 	/*
324 	 * Set the mount level flags.
325 	 */
326 	if (fsflags & MNT_RDONLY)
327 		mp->mnt_flag |= MNT_RDONLY;
328 	else if (mp->mnt_flag & MNT_RDONLY)
329 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
330 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
331 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
332 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
333 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
334 	mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC |
335 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
336 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
337 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
338 	/*
339 	 * Mount the filesystem.
340 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
341 	 * get.  No freeing of cn_pnbuf.
342 	 */
343 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
344 	if (mp->mnt_flag & MNT_UPDATE) {
345 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
346 			mp->mnt_flag &= ~MNT_RDONLY;
347 		mp->mnt_flag &=~
348 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
349 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
350 		if (error) {
351 			mp->mnt_flag = flag;
352 			mp->mnt_kern_flag = flag2;
353 		}
354 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
355 			if (mp->mnt_syncer == NULL)
356 				error = vfs_allocate_syncvnode(mp);
357 		} else {
358 			if (mp->mnt_syncer != NULL)
359 				vrele(mp->mnt_syncer);
360 			mp->mnt_syncer = NULL;
361 		}
362 		vfs_unbusy(mp, td);
363 		mtx_lock(&vp->v_interlock);
364 		vp->v_flag &= ~VMOUNT;
365 		mtx_unlock(&vp->v_interlock);
366 		vrele(vp);
367 		return (error);
368 	}
369 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
370 	/*
371 	 * Put the new filesystem on the mount list after root.
372 	 */
373 	cache_purge(vp);
374 	if (!error) {
375 		struct vnode *newdp;
376 
377 		mtx_lock(&vp->v_interlock);
378 		vp->v_flag &= ~VMOUNT;
379 		vp->v_mountedhere = mp;
380 		mtx_unlock(&vp->v_interlock);
381 		mtx_lock(&mountlist_mtx);
382 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
383 		mtx_unlock(&mountlist_mtx);
384 		if (VFS_ROOT(mp, &newdp))
385 			panic("mount: lost mount");
386 		checkdirs(vp, newdp);
387 		vput(newdp);
388 		VOP_UNLOCK(vp, 0, td);
389 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
390 			error = vfs_allocate_syncvnode(mp);
391 		vfs_unbusy(mp, td);
392 		if ((error = VFS_START(mp, 0, td)) != 0)
393 			vrele(vp);
394 	} else {
395 		mtx_lock(&vp->v_interlock);
396 		vp->v_flag &= ~VMOUNT;
397 		mtx_unlock(&vp->v_interlock);
398 		mp->mnt_vfc->vfc_refcount--;
399 		vfs_unbusy(mp, td);
400 		free((caddr_t)mp, M_MOUNT);
401 		vput(vp);
402 	}
403 	return (error);
404 }
405 
406 /*
407  * Scan all active processes to see if any of them have a current
408  * or root directory of `olddp'. If so, replace them with the new
409  * mount point.
410  */
411 static void
412 checkdirs(olddp, newdp)
413 	struct vnode *olddp, *newdp;
414 {
415 	struct filedesc *fdp;
416 	struct proc *p;
417 
418 	if (olddp->v_usecount == 1)
419 		return;
420 	sx_slock(&allproc_lock);
421 	LIST_FOREACH(p, &allproc, p_list) {
422 		fdp = p->p_fd;
423 		if (fdp == NULL)
424 			continue;
425 		if (fdp->fd_cdir == olddp) {
426 			vrele(fdp->fd_cdir);
427 			VREF(newdp);
428 			fdp->fd_cdir = newdp;
429 		}
430 		if (fdp->fd_rdir == olddp) {
431 			vrele(fdp->fd_rdir);
432 			VREF(newdp);
433 			fdp->fd_rdir = newdp;
434 		}
435 	}
436 	sx_sunlock(&allproc_lock);
437 	if (rootvnode == olddp) {
438 		vrele(rootvnode);
439 		VREF(newdp);
440 		rootvnode = newdp;
441 	}
442 }
443 
444 /*
445  * Unmount a file system.
446  *
447  * Note: unmount takes a path to the vnode mounted on as argument,
448  * not special file (as before).
449  */
450 #ifndef _SYS_SYSPROTO_H_
451 struct unmount_args {
452 	char	*path;
453 	int	flags;
454 };
455 #endif
456 /* ARGSUSED */
457 int
458 unmount(td, uap)
459 	struct thread *td;
460 	register struct unmount_args /* {
461 		syscallarg(char *) path;
462 		syscallarg(int) flags;
463 	} */ *uap;
464 {
465 	register struct vnode *vp;
466 	struct mount *mp;
467 	int error;
468 	struct nameidata nd;
469 
470 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
471 	    SCARG(uap, path), td);
472 	if ((error = namei(&nd)) != 0)
473 		return (error);
474 	vp = nd.ni_vp;
475 	NDFREE(&nd, NDF_ONLY_PNBUF);
476 	mp = vp->v_mount;
477 
478 	/*
479 	 * Only root, or the user that did the original mount is
480 	 * permitted to unmount this filesystem.
481 	 */
482 	if ((mp->mnt_stat.f_owner != td->td_proc->p_ucred->cr_uid) &&
483 	    (error = suser_td(td))) {
484 		vput(vp);
485 		return (error);
486 	}
487 
488 	/*
489 	 * Don't allow unmounting the root file system.
490 	 */
491 	if (mp->mnt_flag & MNT_ROOTFS) {
492 		vput(vp);
493 		return (EINVAL);
494 	}
495 
496 	/*
497 	 * Must be the root of the filesystem
498 	 */
499 	if ((vp->v_flag & VROOT) == 0) {
500 		vput(vp);
501 		return (EINVAL);
502 	}
503 	vput(vp);
504 	return (dounmount(mp, SCARG(uap, flags), td));
505 }
506 
507 /*
508  * Do the actual file system unmount.
509  */
510 int
511 dounmount(mp, flags, td)
512 	struct mount *mp;
513 	int flags;
514 	struct thread *td;
515 {
516 	struct vnode *coveredvp, *fsrootvp;
517 	int error;
518 	int async_flag;
519 
520 	mtx_lock(&mountlist_mtx);
521 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
522 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, td);
523 	vn_start_write(NULL, &mp, V_WAIT);
524 
525 	if (mp->mnt_flag & MNT_EXPUBLIC)
526 		vfs_setpublicfs(NULL, NULL, NULL);
527 
528 	vfs_msync(mp, MNT_WAIT);
529 	async_flag = mp->mnt_flag & MNT_ASYNC;
530 	mp->mnt_flag &=~ MNT_ASYNC;
531 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
532 	if (mp->mnt_syncer != NULL)
533 		vrele(mp->mnt_syncer);
534 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
535 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
536 		if (mp->mnt_vnodecovered != NULL)
537 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
538 		if (fsrootvp == rootvnode) {
539 			vrele(rootvnode);
540 			rootvnode = NULL;
541 		}
542 		vput(fsrootvp);
543 	}
544 	if (((mp->mnt_flag & MNT_RDONLY) ||
545 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td)) == 0) ||
546 	    (flags & MNT_FORCE)) {
547 		error = VFS_UNMOUNT(mp, flags, td);
548 	}
549 	vn_finished_write(mp);
550 	if (error) {
551 		/* Undo cdir/rdir and rootvnode changes made above. */
552 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
553 			if (mp->mnt_vnodecovered != NULL)
554 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
555 			if (rootvnode == NULL) {
556 				rootvnode = fsrootvp;
557 				vref(rootvnode);
558 			}
559 			vput(fsrootvp);
560 		}
561 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
562 			(void) vfs_allocate_syncvnode(mp);
563 		mtx_lock(&mountlist_mtx);
564 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
565 		mp->mnt_flag |= async_flag;
566 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
567 		    &mountlist_mtx, td);
568 		if (mp->mnt_kern_flag & MNTK_MWAIT)
569 			wakeup((caddr_t)mp);
570 		return (error);
571 	}
572 	mtx_lock(&mountlist_mtx);
573 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
574 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
575 		coveredvp->v_mountedhere = NULL;
576 	mp->mnt_vfc->vfc_refcount--;
577 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
578 		panic("unmount: dangling vnode");
579 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
580 	lockdestroy(&mp->mnt_lock);
581 	if (coveredvp != NULL)
582 		vrele(coveredvp);
583 	if (mp->mnt_kern_flag & MNTK_MWAIT)
584 		wakeup((caddr_t)mp);
585 	free((caddr_t)mp, M_MOUNT);
586 	return (0);
587 }
588 
589 /*
590  * Sync each mounted filesystem.
591  */
592 #ifndef _SYS_SYSPROTO_H_
593 struct sync_args {
594         int     dummy;
595 };
596 #endif
597 
598 #ifdef DEBUG
599 static int syncprt = 0;
600 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
601 #endif
602 
603 /* ARGSUSED */
604 int
605 sync(td, uap)
606 	struct thread *td;
607 	struct sync_args *uap;
608 {
609 	struct mount *mp, *nmp;
610 	int asyncflag;
611 
612 	mtx_lock(&mountlist_mtx);
613 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
614 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
615 			nmp = TAILQ_NEXT(mp, mnt_list);
616 			continue;
617 		}
618 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
619 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
620 			asyncflag = mp->mnt_flag & MNT_ASYNC;
621 			mp->mnt_flag &= ~MNT_ASYNC;
622 			vfs_msync(mp, MNT_NOWAIT);
623 			VFS_SYNC(mp, MNT_NOWAIT,
624 			    ((td != NULL) ? td->td_proc->p_ucred : NOCRED), td);
625 			mp->mnt_flag |= asyncflag;
626 			vn_finished_write(mp);
627 		}
628 		mtx_lock(&mountlist_mtx);
629 		nmp = TAILQ_NEXT(mp, mnt_list);
630 		vfs_unbusy(mp, td);
631 	}
632 	mtx_unlock(&mountlist_mtx);
633 #if 0
634 /*
635  * XXX don't call vfs_bufstats() yet because that routine
636  * was not imported in the Lite2 merge.
637  */
638 #ifdef DIAGNOSTIC
639 	if (syncprt)
640 		vfs_bufstats();
641 #endif /* DIAGNOSTIC */
642 #endif
643 	return (0);
644 }
645 
646 /* XXX PRISON: could be per prison flag */
647 static int prison_quotas;
648 #if 0
649 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
650 #endif
651 
652 /*
653  * Change filesystem quotas.
654  */
655 #ifndef _SYS_SYSPROTO_H_
656 struct quotactl_args {
657 	char *path;
658 	int cmd;
659 	int uid;
660 	caddr_t arg;
661 };
662 #endif
663 /* ARGSUSED */
664 int
665 quotactl(td, uap)
666 	struct thread *td;
667 	register struct quotactl_args /* {
668 		syscallarg(char *) path;
669 		syscallarg(int) cmd;
670 		syscallarg(int) uid;
671 		syscallarg(caddr_t) arg;
672 	} */ *uap;
673 {
674 	struct mount *mp;
675 	int error;
676 	struct nameidata nd;
677 
678 	if (jailed(td->td_proc->p_ucred) && !prison_quotas)
679 		return (EPERM);
680 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
681 	if ((error = namei(&nd)) != 0)
682 		return (error);
683 	NDFREE(&nd, NDF_ONLY_PNBUF);
684 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
685 	vrele(nd.ni_vp);
686 	if (error)
687 		return (error);
688 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
689 	    SCARG(uap, arg), td);
690 	vn_finished_write(mp);
691 	return (error);
692 }
693 
694 /*
695  * Get filesystem statistics.
696  */
697 #ifndef _SYS_SYSPROTO_H_
698 struct statfs_args {
699 	char *path;
700 	struct statfs *buf;
701 };
702 #endif
703 /* ARGSUSED */
704 int
705 statfs(td, uap)
706 	struct thread *td;
707 	register struct statfs_args /* {
708 		syscallarg(char *) path;
709 		syscallarg(struct statfs *) buf;
710 	} */ *uap;
711 {
712 	register struct mount *mp;
713 	register struct statfs *sp;
714 	int error;
715 	struct nameidata nd;
716 	struct statfs sb;
717 
718 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
719 	if ((error = namei(&nd)) != 0)
720 		return (error);
721 	mp = nd.ni_vp->v_mount;
722 	sp = &mp->mnt_stat;
723 	NDFREE(&nd, NDF_ONLY_PNBUF);
724 	vrele(nd.ni_vp);
725 	error = VFS_STATFS(mp, sp, td);
726 	if (error)
727 		return (error);
728 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
729 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
730 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
731 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
732 		sp = &sb;
733 	}
734 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
735 }
736 
737 /*
738  * Get filesystem statistics.
739  */
740 #ifndef _SYS_SYSPROTO_H_
741 struct fstatfs_args {
742 	int fd;
743 	struct statfs *buf;
744 };
745 #endif
746 /* ARGSUSED */
747 int
748 fstatfs(td, uap)
749 	struct thread *td;
750 	register struct fstatfs_args /* {
751 		syscallarg(int) fd;
752 		syscallarg(struct statfs *) buf;
753 	} */ *uap;
754 {
755 	struct file *fp;
756 	struct mount *mp;
757 	register struct statfs *sp;
758 	int error;
759 	struct statfs sb;
760 
761 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
762 		return (error);
763 	mp = ((struct vnode *)fp->f_data)->v_mount;
764 	sp = &mp->mnt_stat;
765 	error = VFS_STATFS(mp, sp, td);
766 	if (error)
767 		return (error);
768 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
769 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
770 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
771 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
772 		sp = &sb;
773 	}
774 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
775 }
776 
777 /*
778  * Get statistics on all filesystems.
779  */
780 #ifndef _SYS_SYSPROTO_H_
781 struct getfsstat_args {
782 	struct statfs *buf;
783 	long bufsize;
784 	int flags;
785 };
786 #endif
787 int
788 getfsstat(td, uap)
789 	struct thread *td;
790 	register struct getfsstat_args /* {
791 		syscallarg(struct statfs *) buf;
792 		syscallarg(long) bufsize;
793 		syscallarg(int) flags;
794 	} */ *uap;
795 {
796 	register struct mount *mp, *nmp;
797 	register struct statfs *sp;
798 	caddr_t sfsp;
799 	long count, maxcount, error;
800 
801 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
802 	sfsp = (caddr_t)SCARG(uap, buf);
803 	count = 0;
804 	mtx_lock(&mountlist_mtx);
805 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
806 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
807 			nmp = TAILQ_NEXT(mp, mnt_list);
808 			continue;
809 		}
810 		if (sfsp && count < maxcount) {
811 			sp = &mp->mnt_stat;
812 			/*
813 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
814 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
815 			 * overrides MNT_WAIT.
816 			 */
817 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
818 			    (SCARG(uap, flags) & MNT_WAIT)) &&
819 			    (error = VFS_STATFS(mp, sp, td))) {
820 				mtx_lock(&mountlist_mtx);
821 				nmp = TAILQ_NEXT(mp, mnt_list);
822 				vfs_unbusy(mp, td);
823 				continue;
824 			}
825 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
826 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
827 			if (error) {
828 				vfs_unbusy(mp, td);
829 				return (error);
830 			}
831 			sfsp += sizeof(*sp);
832 		}
833 		count++;
834 		mtx_lock(&mountlist_mtx);
835 		nmp = TAILQ_NEXT(mp, mnt_list);
836 		vfs_unbusy(mp, td);
837 	}
838 	mtx_unlock(&mountlist_mtx);
839 	if (sfsp && count > maxcount)
840 		td->td_retval[0] = maxcount;
841 	else
842 		td->td_retval[0] = count;
843 	return (0);
844 }
845 
846 /*
847  * Change current working directory to a given file descriptor.
848  */
849 #ifndef _SYS_SYSPROTO_H_
850 struct fchdir_args {
851 	int	fd;
852 };
853 #endif
854 /* ARGSUSED */
855 int
856 fchdir(td, uap)
857 	struct thread *td;
858 	struct fchdir_args /* {
859 		syscallarg(int) fd;
860 	} */ *uap;
861 {
862 	register struct filedesc *fdp = td->td_proc->p_fd;
863 	struct vnode *vp, *tdp;
864 	struct mount *mp;
865 	struct file *fp;
866 	int error;
867 
868 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
869 		return (error);
870 	vp = (struct vnode *)fp->f_data;
871 	VREF(vp);
872 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
873 	if (vp->v_type != VDIR)
874 		error = ENOTDIR;
875 	else
876 		error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
877 	while (!error && (mp = vp->v_mountedhere) != NULL) {
878 		if (vfs_busy(mp, 0, 0, td))
879 			continue;
880 		error = VFS_ROOT(mp, &tdp);
881 		vfs_unbusy(mp, td);
882 		if (error)
883 			break;
884 		vput(vp);
885 		vp = tdp;
886 	}
887 	if (error) {
888 		vput(vp);
889 		return (error);
890 	}
891 	VOP_UNLOCK(vp, 0, td);
892 	vrele(fdp->fd_cdir);
893 	fdp->fd_cdir = vp;
894 	return (0);
895 }
896 
897 /*
898  * Change current working directory (``.'').
899  */
900 #ifndef _SYS_SYSPROTO_H_
901 struct chdir_args {
902 	char	*path;
903 };
904 #endif
905 /* ARGSUSED */
906 int
907 chdir(td, uap)
908 	struct thread *td;
909 	struct chdir_args /* {
910 		syscallarg(char *) path;
911 	} */ *uap;
912 {
913 	register struct filedesc *fdp = td->td_proc->p_fd;
914 	int error;
915 	struct nameidata nd;
916 
917 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
918 	    SCARG(uap, path), td);
919 	if ((error = change_dir(&nd, td)) != 0)
920 		return (error);
921 	NDFREE(&nd, NDF_ONLY_PNBUF);
922 	vrele(fdp->fd_cdir);
923 	fdp->fd_cdir = nd.ni_vp;
924 	return (0);
925 }
926 
927 /*
928  * Helper function for raised chroot(2) security function:  Refuse if
929  * any filedescriptors are open directories.
930  */
931 static int
932 chroot_refuse_vdir_fds(fdp)
933 	struct filedesc *fdp;
934 {
935 	struct vnode *vp;
936 	struct file *fp;
937 	int error;
938 	int fd;
939 
940 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
941 		error = getvnode(fdp, fd, &fp);
942 		if (error)
943 			continue;
944 		vp = (struct vnode *)fp->f_data;
945 		if (vp->v_type != VDIR)
946 			continue;
947 		return(EPERM);
948 	}
949 	return (0);
950 }
951 
952 /*
953  * This sysctl determines if we will allow a process to chroot(2) if it
954  * has a directory open:
955  *	0: disallowed for all processes.
956  *	1: allowed for processes that were not already chroot(2)'ed.
957  *	2: allowed for all processes.
958  */
959 
960 static int chroot_allow_open_directories = 1;
961 
962 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
963      &chroot_allow_open_directories, 0, "");
964 
965 /*
966  * Change notion of root (``/'') directory.
967  */
968 #ifndef _SYS_SYSPROTO_H_
969 struct chroot_args {
970 	char	*path;
971 };
972 #endif
973 /* ARGSUSED */
974 int
975 chroot(td, uap)
976 	struct thread *td;
977 	struct chroot_args /* {
978 		syscallarg(char *) path;
979 	} */ *uap;
980 {
981 	register struct filedesc *fdp = td->td_proc->p_fd;
982 	int error;
983 	struct nameidata nd;
984 
985 	error = suser_xxx(0, td->td_proc, PRISON_ROOT);
986 	if (error)
987 		return (error);
988 	if (chroot_allow_open_directories == 0 ||
989 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
990 		error = chroot_refuse_vdir_fds(fdp);
991 	if (error)
992 		return (error);
993 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
994 	    SCARG(uap, path), td);
995 	if ((error = change_dir(&nd, td)) != 0)
996 		return (error);
997 	NDFREE(&nd, NDF_ONLY_PNBUF);
998 	vrele(fdp->fd_rdir);
999 	fdp->fd_rdir = nd.ni_vp;
1000 	if (!fdp->fd_jdir) {
1001 		fdp->fd_jdir = nd.ni_vp;
1002                 VREF(fdp->fd_jdir);
1003 	}
1004 	return (0);
1005 }
1006 
1007 /*
1008  * Common routine for chroot and chdir.
1009  */
1010 static int
1011 change_dir(ndp, td)
1012 	register struct nameidata *ndp;
1013 	struct thread *td;
1014 {
1015 	struct vnode *vp;
1016 	int error;
1017 
1018 	error = namei(ndp);
1019 	if (error)
1020 		return (error);
1021 	vp = ndp->ni_vp;
1022 	if (vp->v_type != VDIR)
1023 		error = ENOTDIR;
1024 	else
1025 		error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td);
1026 	if (error)
1027 		vput(vp);
1028 	else
1029 		VOP_UNLOCK(vp, 0, td);
1030 	return (error);
1031 }
1032 
1033 /*
1034  * Check permissions, allocate an open file structure,
1035  * and call the device open routine if any.
1036  */
1037 #ifndef _SYS_SYSPROTO_H_
1038 struct open_args {
1039 	char	*path;
1040 	int	flags;
1041 	int	mode;
1042 };
1043 #endif
1044 int
1045 open(td, uap)
1046 	struct thread *td;
1047 	register struct open_args /* {
1048 		syscallarg(char *) path;
1049 		syscallarg(int) flags;
1050 		syscallarg(int) mode;
1051 	} */ *uap;
1052 {
1053 	struct proc *p = td->td_proc;
1054 	struct filedesc *fdp = p->p_fd;
1055 	struct file *fp;
1056 	struct vnode *vp;
1057 	struct vattr vat;
1058 	struct mount *mp;
1059 	int cmode, flags, oflags;
1060 	struct file *nfp;
1061 	int type, indx, error;
1062 	struct flock lf;
1063 	struct nameidata nd;
1064 
1065 	oflags = SCARG(uap, flags);
1066 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1067 		return (EINVAL);
1068 	flags = FFLAGS(oflags);
1069 	error = falloc(td, &nfp, &indx);
1070 	if (error)
1071 		return (error);
1072 	fp = nfp;
1073 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1074 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1075 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1076 	/*
1077 	 * Bump the ref count to prevent another process from closing
1078 	 * the descriptor while we are blocked in vn_open()
1079 	 */
1080 	fhold(fp);
1081 	error = vn_open(&nd, &flags, cmode);
1082 	if (error) {
1083 		/*
1084 		 * release our own reference
1085 		 */
1086 		fdrop(fp, td);
1087 
1088 		/*
1089 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1090 		 * responsible for dropping the old contents of ofiles[indx]
1091 		 * if it succeeds.
1092 		 */
1093 		if ((error == ENODEV || error == ENXIO) &&
1094 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1095 		    (error =
1096 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1097 			td->td_retval[0] = indx;
1098 			return (0);
1099 		}
1100 		/*
1101 		 * Clean up the descriptor, but only if another thread hadn't
1102 		 * replaced or closed it.
1103 		 */
1104 		if (fdp->fd_ofiles[indx] == fp) {
1105 			fdp->fd_ofiles[indx] = NULL;
1106 			fdrop(fp, td);
1107 		}
1108 
1109 		if (error == ERESTART)
1110 			error = EINTR;
1111 		return (error);
1112 	}
1113 	td->td_dupfd = 0;
1114 	NDFREE(&nd, NDF_ONLY_PNBUF);
1115 	vp = nd.ni_vp;
1116 
1117 	/*
1118 	 * There should be 2 references on the file, one from the descriptor
1119 	 * table, and one for us.
1120 	 *
1121 	 * Handle the case where someone closed the file (via its file
1122 	 * descriptor) while we were blocked.  The end result should look
1123 	 * like opening the file succeeded but it was immediately closed.
1124 	 */
1125 	if (fp->f_count == 1) {
1126 		KASSERT(fdp->fd_ofiles[indx] != fp,
1127 		    ("Open file descriptor lost all refs"));
1128 		VOP_UNLOCK(vp, 0, td);
1129 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1130 		fdrop(fp, td);
1131 		td->td_retval[0] = indx;
1132 		return 0;
1133 	}
1134 
1135 	fp->f_data = (caddr_t)vp;
1136 	fp->f_flag = flags & FMASK;
1137 	fp->f_ops = &vnops;
1138 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1139 	VOP_UNLOCK(vp, 0, td);
1140 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1141 		lf.l_whence = SEEK_SET;
1142 		lf.l_start = 0;
1143 		lf.l_len = 0;
1144 		if (flags & O_EXLOCK)
1145 			lf.l_type = F_WRLCK;
1146 		else
1147 			lf.l_type = F_RDLCK;
1148 		type = F_FLOCK;
1149 		if ((flags & FNONBLOCK) == 0)
1150 			type |= F_WAIT;
1151 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1152 			goto bad;
1153 		fp->f_flag |= FHASLOCK;
1154 	}
1155 	if (flags & O_TRUNC) {
1156 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1157 			goto bad;
1158 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
1159 		VATTR_NULL(&vat);
1160 		vat.va_size = 0;
1161 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1162 		error = VOP_SETATTR(vp, &vat, p->p_ucred, td);
1163 		VOP_UNLOCK(vp, 0, td);
1164 		vn_finished_write(mp);
1165 		if (error)
1166 			goto bad;
1167 	}
1168 	/* assert that vn_open created a backing object if one is needed */
1169 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1170 		("open: vmio vnode has no backing object after vn_open"));
1171 	/*
1172 	 * Release our private reference, leaving the one associated with
1173 	 * the descriptor table intact.
1174 	 */
1175 	fdrop(fp, td);
1176 	td->td_retval[0] = indx;
1177 	return (0);
1178 bad:
1179 	if (fdp->fd_ofiles[indx] == fp) {
1180 		fdp->fd_ofiles[indx] = NULL;
1181 		fdrop(fp, td);
1182 	}
1183 	fdrop(fp, td);
1184 	return (error);
1185 }
1186 
1187 #ifdef COMPAT_43
1188 /*
1189  * Create a file.
1190  */
1191 #ifndef _SYS_SYSPROTO_H_
1192 struct ocreat_args {
1193 	char	*path;
1194 	int	mode;
1195 };
1196 #endif
1197 int
1198 ocreat(td, uap)
1199 	struct thread *td;
1200 	register struct ocreat_args /* {
1201 		syscallarg(char *) path;
1202 		syscallarg(int) mode;
1203 	} */ *uap;
1204 {
1205 	struct open_args /* {
1206 		syscallarg(char *) path;
1207 		syscallarg(int) flags;
1208 		syscallarg(int) mode;
1209 	} */ nuap;
1210 
1211 	SCARG(&nuap, path) = SCARG(uap, path);
1212 	SCARG(&nuap, mode) = SCARG(uap, mode);
1213 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1214 	return (open(td, &nuap));
1215 }
1216 #endif /* COMPAT_43 */
1217 
1218 /*
1219  * Create a special file.
1220  */
1221 #ifndef _SYS_SYSPROTO_H_
1222 struct mknod_args {
1223 	char	*path;
1224 	int	mode;
1225 	int	dev;
1226 };
1227 #endif
1228 /* ARGSUSED */
1229 int
1230 mknod(td, uap)
1231 	struct thread *td;
1232 	register struct mknod_args /* {
1233 		syscallarg(char *) path;
1234 		syscallarg(int) mode;
1235 		syscallarg(int) dev;
1236 	} */ *uap;
1237 {
1238 	struct vnode *vp;
1239 	struct mount *mp;
1240 	struct vattr vattr;
1241 	int error;
1242 	int whiteout = 0;
1243 	struct nameidata nd;
1244 
1245 	switch (SCARG(uap, mode) & S_IFMT) {
1246 	case S_IFCHR:
1247 	case S_IFBLK:
1248 		error = suser_td(td);
1249 		break;
1250 	default:
1251 		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1252 		break;
1253 	}
1254 	if (error)
1255 		return (error);
1256 restart:
1257 	bwillwrite();
1258 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1259 	if ((error = namei(&nd)) != 0)
1260 		return (error);
1261 	vp = nd.ni_vp;
1262 	if (vp != NULL) {
1263 		vrele(vp);
1264 		error = EEXIST;
1265 	} else {
1266 		VATTR_NULL(&vattr);
1267 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1268 		vattr.va_rdev = SCARG(uap, dev);
1269 		whiteout = 0;
1270 
1271 		switch (SCARG(uap, mode) & S_IFMT) {
1272 		case S_IFMT:	/* used by badsect to flag bad sectors */
1273 			vattr.va_type = VBAD;
1274 			break;
1275 		case S_IFCHR:
1276 			vattr.va_type = VCHR;
1277 			break;
1278 		case S_IFBLK:
1279 			vattr.va_type = VBLK;
1280 			break;
1281 		case S_IFWHT:
1282 			whiteout = 1;
1283 			break;
1284 		default:
1285 			error = EINVAL;
1286 			break;
1287 		}
1288 	}
1289 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1290 		NDFREE(&nd, NDF_ONLY_PNBUF);
1291 		vput(nd.ni_dvp);
1292 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1293 			return (error);
1294 		goto restart;
1295 	}
1296 	if (!error) {
1297 		VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1298 		if (whiteout)
1299 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1300 		else {
1301 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1302 						&nd.ni_cnd, &vattr);
1303 			if (error == 0)
1304 				vput(nd.ni_vp);
1305 		}
1306 	}
1307 	NDFREE(&nd, NDF_ONLY_PNBUF);
1308 	vput(nd.ni_dvp);
1309 	vn_finished_write(mp);
1310 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1311 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1312 	return (error);
1313 }
1314 
1315 /*
1316  * Create a named pipe.
1317  */
1318 #ifndef _SYS_SYSPROTO_H_
1319 struct mkfifo_args {
1320 	char	*path;
1321 	int	mode;
1322 };
1323 #endif
1324 /* ARGSUSED */
1325 int
1326 mkfifo(td, uap)
1327 	struct thread *td;
1328 	register struct mkfifo_args /* {
1329 		syscallarg(char *) path;
1330 		syscallarg(int) mode;
1331 	} */ *uap;
1332 {
1333 	struct mount *mp;
1334 	struct vattr vattr;
1335 	int error;
1336 	struct nameidata nd;
1337 
1338 restart:
1339 	bwillwrite();
1340 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1341 	if ((error = namei(&nd)) != 0)
1342 		return (error);
1343 	if (nd.ni_vp != NULL) {
1344 		NDFREE(&nd, NDF_ONLY_PNBUF);
1345 		vrele(nd.ni_vp);
1346 		vput(nd.ni_dvp);
1347 		return (EEXIST);
1348 	}
1349 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1350 		NDFREE(&nd, NDF_ONLY_PNBUF);
1351 		vput(nd.ni_dvp);
1352 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1353 			return (error);
1354 		goto restart;
1355 	}
1356 	VATTR_NULL(&vattr);
1357 	vattr.va_type = VFIFO;
1358 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1359 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1360 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1361 	if (error == 0)
1362 		vput(nd.ni_vp);
1363 	NDFREE(&nd, NDF_ONLY_PNBUF);
1364 	vput(nd.ni_dvp);
1365 	vn_finished_write(mp);
1366 	return (error);
1367 }
1368 
1369 /*
1370  * Make a hard file link.
1371  */
1372 #ifndef _SYS_SYSPROTO_H_
1373 struct link_args {
1374 	char	*path;
1375 	char	*link;
1376 };
1377 #endif
1378 /* ARGSUSED */
1379 int
1380 link(td, uap)
1381 	struct thread *td;
1382 	register struct link_args /* {
1383 		syscallarg(char *) path;
1384 		syscallarg(char *) link;
1385 	} */ *uap;
1386 {
1387 	struct vnode *vp;
1388 	struct mount *mp;
1389 	struct nameidata nd;
1390 	int error;
1391 
1392 	bwillwrite();
1393 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1394 	if ((error = namei(&nd)) != 0)
1395 		return (error);
1396 	NDFREE(&nd, NDF_ONLY_PNBUF);
1397 	vp = nd.ni_vp;
1398 	if (vp->v_type == VDIR) {
1399 		vrele(vp);
1400 		return (EPERM);		/* POSIX */
1401 	}
1402 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1403 		vrele(vp);
1404 		return (error);
1405 	}
1406 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1407 	if ((error = namei(&nd)) == 0) {
1408 		if (nd.ni_vp != NULL) {
1409 			vrele(nd.ni_vp);
1410 			error = EEXIST;
1411 		} else {
1412 			VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1413 			VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
1414 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1415 		}
1416 		NDFREE(&nd, NDF_ONLY_PNBUF);
1417 		vput(nd.ni_dvp);
1418 	}
1419 	vrele(vp);
1420 	vn_finished_write(mp);
1421 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1422 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1423 	return (error);
1424 }
1425 
1426 /*
1427  * Make a symbolic link.
1428  */
1429 #ifndef _SYS_SYSPROTO_H_
1430 struct symlink_args {
1431 	char	*path;
1432 	char	*link;
1433 };
1434 #endif
1435 /* ARGSUSED */
1436 int
1437 symlink(td, uap)
1438 	struct thread *td;
1439 	register struct symlink_args /* {
1440 		syscallarg(char *) path;
1441 		syscallarg(char *) link;
1442 	} */ *uap;
1443 {
1444 	struct mount *mp;
1445 	struct vattr vattr;
1446 	char *path;
1447 	int error;
1448 	struct nameidata nd;
1449 
1450 	path = zalloc(namei_zone);
1451 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1452 		goto out;
1453 restart:
1454 	bwillwrite();
1455 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1456 	if ((error = namei(&nd)) != 0)
1457 		goto out;
1458 	if (nd.ni_vp) {
1459 		NDFREE(&nd, NDF_ONLY_PNBUF);
1460 		vrele(nd.ni_vp);
1461 		vput(nd.ni_dvp);
1462 		error = EEXIST;
1463 		goto out;
1464 	}
1465 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1466 		NDFREE(&nd, NDF_ONLY_PNBUF);
1467 		vput(nd.ni_dvp);
1468 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1469 			return (error);
1470 		goto restart;
1471 	}
1472 	VATTR_NULL(&vattr);
1473 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1474 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1475 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1476 	NDFREE(&nd, NDF_ONLY_PNBUF);
1477 	if (error == 0)
1478 		vput(nd.ni_vp);
1479 	vput(nd.ni_dvp);
1480 	vn_finished_write(mp);
1481 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1482 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1483 out:
1484 	zfree(namei_zone, path);
1485 	return (error);
1486 }
1487 
1488 /*
1489  * Delete a whiteout from the filesystem.
1490  */
1491 /* ARGSUSED */
1492 int
1493 undelete(td, uap)
1494 	struct thread *td;
1495 	register struct undelete_args /* {
1496 		syscallarg(char *) path;
1497 	} */ *uap;
1498 {
1499 	int error;
1500 	struct mount *mp;
1501 	struct nameidata nd;
1502 
1503 restart:
1504 	bwillwrite();
1505 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1506 	    SCARG(uap, path), td);
1507 	error = namei(&nd);
1508 	if (error)
1509 		return (error);
1510 
1511 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1512 		NDFREE(&nd, NDF_ONLY_PNBUF);
1513 		if (nd.ni_vp)
1514 			vrele(nd.ni_vp);
1515 		vput(nd.ni_dvp);
1516 		return (EEXIST);
1517 	}
1518 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1519 		NDFREE(&nd, NDF_ONLY_PNBUF);
1520 		vput(nd.ni_dvp);
1521 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1522 			return (error);
1523 		goto restart;
1524 	}
1525 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1526 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1527 	NDFREE(&nd, NDF_ONLY_PNBUF);
1528 	vput(nd.ni_dvp);
1529 	vn_finished_write(mp);
1530 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1531 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1532 	return (error);
1533 }
1534 
1535 /*
1536  * Delete a name from the filesystem.
1537  */
1538 #ifndef _SYS_SYSPROTO_H_
1539 struct unlink_args {
1540 	char	*path;
1541 };
1542 #endif
1543 /* ARGSUSED */
1544 int
1545 unlink(td, uap)
1546 	struct thread *td;
1547 	struct unlink_args /* {
1548 		syscallarg(char *) path;
1549 	} */ *uap;
1550 {
1551 	struct mount *mp;
1552 	struct vnode *vp;
1553 	int error;
1554 	struct nameidata nd;
1555 
1556 restart:
1557 	bwillwrite();
1558 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1559 	if ((error = namei(&nd)) != 0)
1560 		return (error);
1561 	vp = nd.ni_vp;
1562 	if (vp->v_type == VDIR)
1563 		error = EPERM;		/* POSIX */
1564 	else {
1565 		/*
1566 		 * The root of a mounted filesystem cannot be deleted.
1567 		 *
1568 		 * XXX: can this only be a VDIR case?
1569 		 */
1570 		if (vp->v_flag & VROOT)
1571 			error = EBUSY;
1572 	}
1573 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1574 		NDFREE(&nd, NDF_ONLY_PNBUF);
1575 		vrele(vp);
1576 		vput(nd.ni_dvp);
1577 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1578 			return (error);
1579 		goto restart;
1580 	}
1581 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
1582 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1583 	if (!error) {
1584 		VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
1585 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1586 	}
1587 	NDFREE(&nd, NDF_ONLY_PNBUF);
1588 	vput(nd.ni_dvp);
1589 	vput(vp);
1590 	vn_finished_write(mp);
1591 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1592 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1593 	return (error);
1594 }
1595 
1596 /*
1597  * Reposition read/write file offset.
1598  */
1599 #ifndef _SYS_SYSPROTO_H_
1600 struct lseek_args {
1601 	int	fd;
1602 	int	pad;
1603 	off_t	offset;
1604 	int	whence;
1605 };
1606 #endif
1607 int
1608 lseek(td, uap)
1609 	struct thread *td;
1610 	register struct lseek_args /* {
1611 		syscallarg(int) fd;
1612 		syscallarg(int) pad;
1613 		syscallarg(off_t) offset;
1614 		syscallarg(int) whence;
1615 	} */ *uap;
1616 {
1617 	struct ucred *cred = td->td_proc->p_ucred;
1618 	register struct filedesc *fdp = td->td_proc->p_fd;
1619 	register struct file *fp;
1620 	struct vattr vattr;
1621 	struct vnode *vp;
1622 	off_t offset;
1623 	int error, noneg;
1624 
1625 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1626 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1627 		return (EBADF);
1628 	if (fp->f_type != DTYPE_VNODE)
1629 		return (ESPIPE);
1630 	vp = (struct vnode *)fp->f_data;
1631 	noneg = (vp->v_type != VCHR);
1632 	offset = SCARG(uap, offset);
1633 	switch (SCARG(uap, whence)) {
1634 	case L_INCR:
1635 		if (noneg &&
1636 		    (fp->f_offset < 0 ||
1637 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1638 			return (EOVERFLOW);
1639 		offset += fp->f_offset;
1640 		break;
1641 	case L_XTND:
1642 		error = VOP_GETATTR(vp, &vattr, cred, td);
1643 		if (error)
1644 			return (error);
1645 		if (noneg &&
1646 		    (vattr.va_size > OFF_MAX ||
1647 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1648 			return (EOVERFLOW);
1649 		offset += vattr.va_size;
1650 		break;
1651 	case L_SET:
1652 		break;
1653 	default:
1654 		return (EINVAL);
1655 	}
1656 	if (noneg && offset < 0)
1657 		return (EINVAL);
1658 	fp->f_offset = offset;
1659 	*(off_t *)(td->td_retval) = fp->f_offset;
1660 	return (0);
1661 }
1662 
1663 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1664 /*
1665  * Reposition read/write file offset.
1666  */
1667 #ifndef _SYS_SYSPROTO_H_
1668 struct olseek_args {
1669 	int	fd;
1670 	long	offset;
1671 	int	whence;
1672 };
1673 #endif
1674 int
1675 olseek(td, uap)
1676 	struct thread *td;
1677 	register struct olseek_args /* {
1678 		syscallarg(int) fd;
1679 		syscallarg(long) offset;
1680 		syscallarg(int) whence;
1681 	} */ *uap;
1682 {
1683 	struct lseek_args /* {
1684 		syscallarg(int) fd;
1685 		syscallarg(int) pad;
1686 		syscallarg(off_t) offset;
1687 		syscallarg(int) whence;
1688 	} */ nuap;
1689 	int error;
1690 
1691 	SCARG(&nuap, fd) = SCARG(uap, fd);
1692 	SCARG(&nuap, offset) = SCARG(uap, offset);
1693 	SCARG(&nuap, whence) = SCARG(uap, whence);
1694 	error = lseek(td, &nuap);
1695 	return (error);
1696 }
1697 #endif /* COMPAT_43 */
1698 
1699 /*
1700  * Check access permissions.
1701  */
1702 #ifndef _SYS_SYSPROTO_H_
1703 struct access_args {
1704 	char	*path;
1705 	int	flags;
1706 };
1707 #endif
1708 int
1709 access(td, uap)
1710 	struct thread *td;
1711 	register struct access_args /* {
1712 		syscallarg(char *) path;
1713 		syscallarg(int) flags;
1714 	} */ *uap;
1715 {
1716 	struct ucred *cred, *tmpcred;
1717 	register struct vnode *vp;
1718 	int error, flags;
1719 	struct nameidata nd;
1720 
1721 	cred = td->td_proc->p_ucred;
1722 	/*
1723 	 * Create and modify a temporary credential instead of one that
1724 	 * is potentially shared.  This could also mess up socket
1725 	 * buffer accounting which can run in an interrupt context.
1726 	 *
1727 	 * XXX - Depending on how "threads" are finally implemented, it
1728 	 * may be better to explicitly pass the credential to namei()
1729 	 * rather than to modify the potentially shared process structure.
1730 	 */
1731 	tmpcred = crdup(cred);
1732 	tmpcred->cr_uid = cred->cr_ruid;
1733 	tmpcred->cr_groups[0] = cred->cr_rgid;
1734 	td->td_proc->p_ucred = tmpcred;
1735 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1736 	    SCARG(uap, path), td);
1737 	if ((error = namei(&nd)) != 0)
1738 		goto out1;
1739 	vp = nd.ni_vp;
1740 
1741 	/* Flags == 0 means only check for existence. */
1742 	if (SCARG(uap, flags)) {
1743 		flags = 0;
1744 		if (SCARG(uap, flags) & R_OK)
1745 			flags |= VREAD;
1746 		if (SCARG(uap, flags) & W_OK)
1747 			flags |= VWRITE;
1748 		if (SCARG(uap, flags) & X_OK)
1749 			flags |= VEXEC;
1750 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1751 			error = VOP_ACCESS(vp, flags, tmpcred, td);
1752 	}
1753 	NDFREE(&nd, NDF_ONLY_PNBUF);
1754 	vput(vp);
1755 out1:
1756 	td->td_proc->p_ucred = cred;
1757 	crfree(tmpcred);
1758 	return (error);
1759 }
1760 
1761 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1762 /*
1763  * Get file status; this version follows links.
1764  */
1765 #ifndef _SYS_SYSPROTO_H_
1766 struct ostat_args {
1767 	char	*path;
1768 	struct ostat *ub;
1769 };
1770 #endif
1771 /* ARGSUSED */
1772 int
1773 ostat(td, uap)
1774 	struct thread *td;
1775 	register struct ostat_args /* {
1776 		syscallarg(char *) path;
1777 		syscallarg(struct ostat *) ub;
1778 	} */ *uap;
1779 {
1780 	struct stat sb;
1781 	struct ostat osb;
1782 	int error;
1783 	struct nameidata nd;
1784 
1785 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1786 	    SCARG(uap, path), td);
1787 	if ((error = namei(&nd)) != 0)
1788 		return (error);
1789 	NDFREE(&nd, NDF_ONLY_PNBUF);
1790 	error = vn_stat(nd.ni_vp, &sb, td);
1791 	vput(nd.ni_vp);
1792 	if (error)
1793 		return (error);
1794 	cvtstat(&sb, &osb);
1795 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1796 	return (error);
1797 }
1798 
1799 /*
1800  * Get file status; this version does not follow links.
1801  */
1802 #ifndef _SYS_SYSPROTO_H_
1803 struct olstat_args {
1804 	char	*path;
1805 	struct ostat *ub;
1806 };
1807 #endif
1808 /* ARGSUSED */
1809 int
1810 olstat(td, uap)
1811 	struct thread *td;
1812 	register struct olstat_args /* {
1813 		syscallarg(char *) path;
1814 		syscallarg(struct ostat *) ub;
1815 	} */ *uap;
1816 {
1817 	struct vnode *vp;
1818 	struct stat sb;
1819 	struct ostat osb;
1820 	int error;
1821 	struct nameidata nd;
1822 
1823 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1824 	    SCARG(uap, path), td);
1825 	if ((error = namei(&nd)) != 0)
1826 		return (error);
1827 	vp = nd.ni_vp;
1828 	error = vn_stat(vp, &sb, td);
1829 	NDFREE(&nd, NDF_ONLY_PNBUF);
1830 	vput(vp);
1831 	if (error)
1832 		return (error);
1833 	cvtstat(&sb, &osb);
1834 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1835 	return (error);
1836 }
1837 
1838 /*
1839  * Convert from an old to a new stat structure.
1840  */
1841 void
1842 cvtstat(st, ost)
1843 	struct stat *st;
1844 	struct ostat *ost;
1845 {
1846 
1847 	ost->st_dev = st->st_dev;
1848 	ost->st_ino = st->st_ino;
1849 	ost->st_mode = st->st_mode;
1850 	ost->st_nlink = st->st_nlink;
1851 	ost->st_uid = st->st_uid;
1852 	ost->st_gid = st->st_gid;
1853 	ost->st_rdev = st->st_rdev;
1854 	if (st->st_size < (quad_t)1 << 32)
1855 		ost->st_size = st->st_size;
1856 	else
1857 		ost->st_size = -2;
1858 	ost->st_atime = st->st_atime;
1859 	ost->st_mtime = st->st_mtime;
1860 	ost->st_ctime = st->st_ctime;
1861 	ost->st_blksize = st->st_blksize;
1862 	ost->st_blocks = st->st_blocks;
1863 	ost->st_flags = st->st_flags;
1864 	ost->st_gen = st->st_gen;
1865 }
1866 #endif /* COMPAT_43 || COMPAT_SUNOS */
1867 
1868 /*
1869  * Get file status; this version follows links.
1870  */
1871 #ifndef _SYS_SYSPROTO_H_
1872 struct stat_args {
1873 	char	*path;
1874 	struct stat *ub;
1875 };
1876 #endif
1877 /* ARGSUSED */
1878 int
1879 stat(td, uap)
1880 	struct thread *td;
1881 	register struct stat_args /* {
1882 		syscallarg(char *) path;
1883 		syscallarg(struct stat *) ub;
1884 	} */ *uap;
1885 {
1886 	struct stat sb;
1887 	int error;
1888 	struct nameidata nd;
1889 
1890 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1891 	    SCARG(uap, path), td);
1892 	if ((error = namei(&nd)) != 0)
1893 		return (error);
1894 	error = vn_stat(nd.ni_vp, &sb, td);
1895 	NDFREE(&nd, NDF_ONLY_PNBUF);
1896 	vput(nd.ni_vp);
1897 	if (error)
1898 		return (error);
1899 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1900 	return (error);
1901 }
1902 
1903 /*
1904  * Get file status; this version does not follow links.
1905  */
1906 #ifndef _SYS_SYSPROTO_H_
1907 struct lstat_args {
1908 	char	*path;
1909 	struct stat *ub;
1910 };
1911 #endif
1912 /* ARGSUSED */
1913 int
1914 lstat(td, uap)
1915 	struct thread *td;
1916 	register struct lstat_args /* {
1917 		syscallarg(char *) path;
1918 		syscallarg(struct stat *) ub;
1919 	} */ *uap;
1920 {
1921 	int error;
1922 	struct vnode *vp;
1923 	struct stat sb;
1924 	struct nameidata nd;
1925 
1926 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1927 	    SCARG(uap, path), td);
1928 	if ((error = namei(&nd)) != 0)
1929 		return (error);
1930 	vp = nd.ni_vp;
1931 	error = vn_stat(vp, &sb, td);
1932 	NDFREE(&nd, NDF_ONLY_PNBUF);
1933 	vput(vp);
1934 	if (error)
1935 		return (error);
1936 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1937 	return (error);
1938 }
1939 
1940 /*
1941  * Implementation of the NetBSD stat() function.
1942  * XXX This should probably be collapsed with the FreeBSD version,
1943  * as the differences are only due to vn_stat() clearing spares at
1944  * the end of the structures.  vn_stat could be split to avoid this,
1945  * and thus collapse the following to close to zero code.
1946  */
1947 void
1948 cvtnstat(sb, nsb)
1949 	struct stat *sb;
1950 	struct nstat *nsb;
1951 {
1952 	nsb->st_dev = sb->st_dev;
1953 	nsb->st_ino = sb->st_ino;
1954 	nsb->st_mode = sb->st_mode;
1955 	nsb->st_nlink = sb->st_nlink;
1956 	nsb->st_uid = sb->st_uid;
1957 	nsb->st_gid = sb->st_gid;
1958 	nsb->st_rdev = sb->st_rdev;
1959 	nsb->st_atimespec = sb->st_atimespec;
1960 	nsb->st_mtimespec = sb->st_mtimespec;
1961 	nsb->st_ctimespec = sb->st_ctimespec;
1962 	nsb->st_size = sb->st_size;
1963 	nsb->st_blocks = sb->st_blocks;
1964 	nsb->st_blksize = sb->st_blksize;
1965 	nsb->st_flags = sb->st_flags;
1966 	nsb->st_gen = sb->st_gen;
1967 	nsb->st_qspare[0] = sb->st_qspare[0];
1968 	nsb->st_qspare[1] = sb->st_qspare[1];
1969 }
1970 
1971 #ifndef _SYS_SYSPROTO_H_
1972 struct nstat_args {
1973 	char	*path;
1974 	struct nstat *ub;
1975 };
1976 #endif
1977 /* ARGSUSED */
1978 int
1979 nstat(td, uap)
1980 	struct thread *td;
1981 	register struct nstat_args /* {
1982 		syscallarg(char *) path;
1983 		syscallarg(struct nstat *) ub;
1984 	} */ *uap;
1985 {
1986 	struct stat sb;
1987 	struct nstat nsb;
1988 	int error;
1989 	struct nameidata nd;
1990 
1991 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1992 	    SCARG(uap, path), td);
1993 	if ((error = namei(&nd)) != 0)
1994 		return (error);
1995 	NDFREE(&nd, NDF_ONLY_PNBUF);
1996 	error = vn_stat(nd.ni_vp, &sb, td);
1997 	vput(nd.ni_vp);
1998 	if (error)
1999 		return (error);
2000 	cvtnstat(&sb, &nsb);
2001 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2002 	return (error);
2003 }
2004 
2005 /*
2006  * NetBSD lstat.  Get file status; this version does not follow links.
2007  */
2008 #ifndef _SYS_SYSPROTO_H_
2009 struct lstat_args {
2010 	char	*path;
2011 	struct stat *ub;
2012 };
2013 #endif
2014 /* ARGSUSED */
2015 int
2016 nlstat(td, uap)
2017 	struct thread *td;
2018 	register struct nlstat_args /* {
2019 		syscallarg(char *) path;
2020 		syscallarg(struct nstat *) ub;
2021 	} */ *uap;
2022 {
2023 	int error;
2024 	struct vnode *vp;
2025 	struct stat sb;
2026 	struct nstat nsb;
2027 	struct nameidata nd;
2028 
2029 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2030 	    SCARG(uap, path), td);
2031 	if ((error = namei(&nd)) != 0)
2032 		return (error);
2033 	vp = nd.ni_vp;
2034 	NDFREE(&nd, NDF_ONLY_PNBUF);
2035 	error = vn_stat(vp, &sb, td);
2036 	vput(vp);
2037 	if (error)
2038 		return (error);
2039 	cvtnstat(&sb, &nsb);
2040 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2041 	return (error);
2042 }
2043 
2044 /*
2045  * Get configurable pathname variables.
2046  */
2047 #ifndef _SYS_SYSPROTO_H_
2048 struct pathconf_args {
2049 	char	*path;
2050 	int	name;
2051 };
2052 #endif
2053 /* ARGSUSED */
2054 int
2055 pathconf(td, uap)
2056 	struct thread *td;
2057 	register struct pathconf_args /* {
2058 		syscallarg(char *) path;
2059 		syscallarg(int) name;
2060 	} */ *uap;
2061 {
2062 	int error;
2063 	struct nameidata nd;
2064 
2065 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2066 	    SCARG(uap, path), td);
2067 	if ((error = namei(&nd)) != 0)
2068 		return (error);
2069 	NDFREE(&nd, NDF_ONLY_PNBUF);
2070 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2071 	vput(nd.ni_vp);
2072 	return (error);
2073 }
2074 
2075 /*
2076  * Return target name of a symbolic link.
2077  */
2078 #ifndef _SYS_SYSPROTO_H_
2079 struct readlink_args {
2080 	char	*path;
2081 	char	*buf;
2082 	int	count;
2083 };
2084 #endif
2085 /* ARGSUSED */
2086 int
2087 readlink(td, uap)
2088 	struct thread *td;
2089 	register struct readlink_args /* {
2090 		syscallarg(char *) path;
2091 		syscallarg(char *) buf;
2092 		syscallarg(int) count;
2093 	} */ *uap;
2094 {
2095 	register struct vnode *vp;
2096 	struct iovec aiov;
2097 	struct uio auio;
2098 	int error;
2099 	struct nameidata nd;
2100 
2101 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2102 	    SCARG(uap, path), td);
2103 	if ((error = namei(&nd)) != 0)
2104 		return (error);
2105 	NDFREE(&nd, NDF_ONLY_PNBUF);
2106 	vp = nd.ni_vp;
2107 	if (vp->v_type != VLNK)
2108 		error = EINVAL;
2109 	else {
2110 		aiov.iov_base = SCARG(uap, buf);
2111 		aiov.iov_len = SCARG(uap, count);
2112 		auio.uio_iov = &aiov;
2113 		auio.uio_iovcnt = 1;
2114 		auio.uio_offset = 0;
2115 		auio.uio_rw = UIO_READ;
2116 		auio.uio_segflg = UIO_USERSPACE;
2117 		auio.uio_td = td;
2118 		auio.uio_resid = SCARG(uap, count);
2119 		error = VOP_READLINK(vp, &auio, td->td_proc->p_ucred);
2120 	}
2121 	vput(vp);
2122 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2123 	return (error);
2124 }
2125 
2126 /*
2127  * Common implementation code for chflags() and fchflags().
2128  */
2129 static int
2130 setfflags(td, vp, flags)
2131 	struct thread *td;
2132 	struct vnode *vp;
2133 	int flags;
2134 {
2135 	int error;
2136 	struct mount *mp;
2137 	struct vattr vattr;
2138 
2139 	/*
2140 	 * Prevent non-root users from setting flags on devices.  When
2141 	 * a device is reused, users can retain ownership of the device
2142 	 * if they are allowed to set flags and programs assume that
2143 	 * chown can't fail when done as root.
2144 	 */
2145 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2146 	    ((error = suser_xxx(td->td_proc->p_ucred, td->td_proc, PRISON_ROOT)) != 0))
2147 		return (error);
2148 
2149 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2150 		return (error);
2151 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2152 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2153 	VATTR_NULL(&vattr);
2154 	vattr.va_flags = flags;
2155 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2156 	VOP_UNLOCK(vp, 0, td);
2157 	vn_finished_write(mp);
2158 	return (error);
2159 }
2160 
2161 /*
2162  * Change flags of a file given a path name.
2163  */
2164 #ifndef _SYS_SYSPROTO_H_
2165 struct chflags_args {
2166 	char	*path;
2167 	int	flags;
2168 };
2169 #endif
2170 /* ARGSUSED */
2171 int
2172 chflags(td, uap)
2173 	struct thread *td;
2174 	register struct chflags_args /* {
2175 		syscallarg(char *) path;
2176 		syscallarg(int) flags;
2177 	} */ *uap;
2178 {
2179 	int error;
2180 	struct nameidata nd;
2181 
2182 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2183 	if ((error = namei(&nd)) != 0)
2184 		return (error);
2185 	NDFREE(&nd, NDF_ONLY_PNBUF);
2186 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2187 	vrele(nd.ni_vp);
2188 	return error;
2189 }
2190 
2191 /*
2192  * Change flags of a file given a file descriptor.
2193  */
2194 #ifndef _SYS_SYSPROTO_H_
2195 struct fchflags_args {
2196 	int	fd;
2197 	int	flags;
2198 };
2199 #endif
2200 /* ARGSUSED */
2201 int
2202 fchflags(td, uap)
2203 	struct thread *td;
2204 	register struct fchflags_args /* {
2205 		syscallarg(int) fd;
2206 		syscallarg(int) flags;
2207 	} */ *uap;
2208 {
2209 	struct file *fp;
2210 	int error;
2211 
2212 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2213 		return (error);
2214 	return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2215 }
2216 
2217 /*
2218  * Common implementation code for chmod(), lchmod() and fchmod().
2219  */
2220 static int
2221 setfmode(td, vp, mode)
2222 	struct thread *td;
2223 	struct vnode *vp;
2224 	int mode;
2225 {
2226 	int error;
2227 	struct mount *mp;
2228 	struct vattr vattr;
2229 
2230 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2231 		return (error);
2232 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2233 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2234 	VATTR_NULL(&vattr);
2235 	vattr.va_mode = mode & ALLPERMS;
2236 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2237 	VOP_UNLOCK(vp, 0, td);
2238 	vn_finished_write(mp);
2239 	return error;
2240 }
2241 
2242 /*
2243  * Change mode of a file given path name.
2244  */
2245 #ifndef _SYS_SYSPROTO_H_
2246 struct chmod_args {
2247 	char	*path;
2248 	int	mode;
2249 };
2250 #endif
2251 /* ARGSUSED */
2252 int
2253 chmod(td, uap)
2254 	struct thread *td;
2255 	register struct chmod_args /* {
2256 		syscallarg(char *) path;
2257 		syscallarg(int) mode;
2258 	} */ *uap;
2259 {
2260 	int error;
2261 	struct nameidata nd;
2262 
2263 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2264 	if ((error = namei(&nd)) != 0)
2265 		return (error);
2266 	NDFREE(&nd, NDF_ONLY_PNBUF);
2267 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2268 	vrele(nd.ni_vp);
2269 	return error;
2270 }
2271 
2272 /*
2273  * Change mode of a file given path name (don't follow links.)
2274  */
2275 #ifndef _SYS_SYSPROTO_H_
2276 struct lchmod_args {
2277 	char	*path;
2278 	int	mode;
2279 };
2280 #endif
2281 /* ARGSUSED */
2282 int
2283 lchmod(td, uap)
2284 	struct thread *td;
2285 	register struct lchmod_args /* {
2286 		syscallarg(char *) path;
2287 		syscallarg(int) mode;
2288 	} */ *uap;
2289 {
2290 	int error;
2291 	struct nameidata nd;
2292 
2293 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2294 	if ((error = namei(&nd)) != 0)
2295 		return (error);
2296 	NDFREE(&nd, NDF_ONLY_PNBUF);
2297 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2298 	vrele(nd.ni_vp);
2299 	return error;
2300 }
2301 
2302 /*
2303  * Change mode of a file given a file descriptor.
2304  */
2305 #ifndef _SYS_SYSPROTO_H_
2306 struct fchmod_args {
2307 	int	fd;
2308 	int	mode;
2309 };
2310 #endif
2311 /* ARGSUSED */
2312 int
2313 fchmod(td, uap)
2314 	struct thread *td;
2315 	register struct fchmod_args /* {
2316 		syscallarg(int) fd;
2317 		syscallarg(int) mode;
2318 	} */ *uap;
2319 {
2320 	struct file *fp;
2321 	int error;
2322 
2323 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2324 		return (error);
2325 	return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2326 }
2327 
2328 /*
2329  * Common implementation for chown(), lchown(), and fchown()
2330  */
2331 static int
2332 setfown(td, vp, uid, gid)
2333 	struct thread *td;
2334 	struct vnode *vp;
2335 	uid_t uid;
2336 	gid_t gid;
2337 {
2338 	int error;
2339 	struct mount *mp;
2340 	struct vattr vattr;
2341 
2342 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2343 		return (error);
2344 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2345 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2346 	VATTR_NULL(&vattr);
2347 	vattr.va_uid = uid;
2348 	vattr.va_gid = gid;
2349 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2350 	VOP_UNLOCK(vp, 0, td);
2351 	vn_finished_write(mp);
2352 	return error;
2353 }
2354 
2355 /*
2356  * Set ownership given a path name.
2357  */
2358 #ifndef _SYS_SYSPROTO_H_
2359 struct chown_args {
2360 	char	*path;
2361 	int	uid;
2362 	int	gid;
2363 };
2364 #endif
2365 /* ARGSUSED */
2366 int
2367 chown(td, uap)
2368 	struct thread *td;
2369 	register struct chown_args /* {
2370 		syscallarg(char *) path;
2371 		syscallarg(int) uid;
2372 		syscallarg(int) gid;
2373 	} */ *uap;
2374 {
2375 	int error;
2376 	struct nameidata nd;
2377 
2378 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2379 	if ((error = namei(&nd)) != 0)
2380 		return (error);
2381 	NDFREE(&nd, NDF_ONLY_PNBUF);
2382 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2383 	vrele(nd.ni_vp);
2384 	return (error);
2385 }
2386 
2387 /*
2388  * Set ownership given a path name, do not cross symlinks.
2389  */
2390 #ifndef _SYS_SYSPROTO_H_
2391 struct lchown_args {
2392 	char	*path;
2393 	int	uid;
2394 	int	gid;
2395 };
2396 #endif
2397 /* ARGSUSED */
2398 int
2399 lchown(td, uap)
2400 	struct thread *td;
2401 	register struct lchown_args /* {
2402 		syscallarg(char *) path;
2403 		syscallarg(int) uid;
2404 		syscallarg(int) gid;
2405 	} */ *uap;
2406 {
2407 	int error;
2408 	struct nameidata nd;
2409 
2410 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2411 	if ((error = namei(&nd)) != 0)
2412 		return (error);
2413 	NDFREE(&nd, NDF_ONLY_PNBUF);
2414 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2415 	vrele(nd.ni_vp);
2416 	return (error);
2417 }
2418 
2419 /*
2420  * Set ownership given a file descriptor.
2421  */
2422 #ifndef _SYS_SYSPROTO_H_
2423 struct fchown_args {
2424 	int	fd;
2425 	int	uid;
2426 	int	gid;
2427 };
2428 #endif
2429 /* ARGSUSED */
2430 int
2431 fchown(td, uap)
2432 	struct thread *td;
2433 	register struct fchown_args /* {
2434 		syscallarg(int) fd;
2435 		syscallarg(int) uid;
2436 		syscallarg(int) gid;
2437 	} */ *uap;
2438 {
2439 	struct file *fp;
2440 	int error;
2441 
2442 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2443 		return (error);
2444 	return setfown(td, (struct vnode *)fp->f_data,
2445 		SCARG(uap, uid), SCARG(uap, gid));
2446 }
2447 
2448 /*
2449  * Common implementation code for utimes(), lutimes(), and futimes().
2450  */
2451 static int
2452 getutimes(usrtvp, tsp)
2453 	const struct timeval *usrtvp;
2454 	struct timespec *tsp;
2455 {
2456 	struct timeval tv[2];
2457 	int error;
2458 
2459 	if (usrtvp == NULL) {
2460 		microtime(&tv[0]);
2461 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2462 		tsp[1] = tsp[0];
2463 	} else {
2464 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2465 			return (error);
2466 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2467 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2468 	}
2469 	return 0;
2470 }
2471 
2472 /*
2473  * Common implementation code for utimes(), lutimes(), and futimes().
2474  */
2475 static int
2476 setutimes(td, vp, ts, nullflag)
2477 	struct thread *td;
2478 	struct vnode *vp;
2479 	const struct timespec *ts;
2480 	int nullflag;
2481 {
2482 	int error;
2483 	struct mount *mp;
2484 	struct vattr vattr;
2485 
2486 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2487 		return (error);
2488 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2489 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2490 	VATTR_NULL(&vattr);
2491 	vattr.va_atime = ts[0];
2492 	vattr.va_mtime = ts[1];
2493 	if (nullflag)
2494 		vattr.va_vaflags |= VA_UTIMES_NULL;
2495 	error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2496 	VOP_UNLOCK(vp, 0, td);
2497 	vn_finished_write(mp);
2498 	return error;
2499 }
2500 
2501 /*
2502  * Set the access and modification times of a file.
2503  */
2504 #ifndef _SYS_SYSPROTO_H_
2505 struct utimes_args {
2506 	char	*path;
2507 	struct	timeval *tptr;
2508 };
2509 #endif
2510 /* ARGSUSED */
2511 int
2512 utimes(td, uap)
2513 	struct thread *td;
2514 	register struct utimes_args /* {
2515 		syscallarg(char *) path;
2516 		syscallarg(struct timeval *) tptr;
2517 	} */ *uap;
2518 {
2519 	struct timespec ts[2];
2520 	struct timeval *usrtvp;
2521 	int error;
2522 	struct nameidata nd;
2523 
2524 	usrtvp = SCARG(uap, tptr);
2525 	if ((error = getutimes(usrtvp, ts)) != 0)
2526 		return (error);
2527 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2528 	if ((error = namei(&nd)) != 0)
2529 		return (error);
2530 	NDFREE(&nd, NDF_ONLY_PNBUF);
2531 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2532 	vrele(nd.ni_vp);
2533 	return (error);
2534 }
2535 
2536 /*
2537  * Set the access and modification times of a file.
2538  */
2539 #ifndef _SYS_SYSPROTO_H_
2540 struct lutimes_args {
2541 	char	*path;
2542 	struct	timeval *tptr;
2543 };
2544 #endif
2545 /* ARGSUSED */
2546 int
2547 lutimes(td, uap)
2548 	struct thread *td;
2549 	register struct lutimes_args /* {
2550 		syscallarg(char *) path;
2551 		syscallarg(struct timeval *) tptr;
2552 	} */ *uap;
2553 {
2554 	struct timespec ts[2];
2555 	struct timeval *usrtvp;
2556 	int error;
2557 	struct nameidata nd;
2558 
2559 	usrtvp = SCARG(uap, tptr);
2560 	if ((error = getutimes(usrtvp, ts)) != 0)
2561 		return (error);
2562 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2563 	if ((error = namei(&nd)) != 0)
2564 		return (error);
2565 	NDFREE(&nd, NDF_ONLY_PNBUF);
2566 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2567 	vrele(nd.ni_vp);
2568 	return (error);
2569 }
2570 
2571 /*
2572  * Set the access and modification times of a file.
2573  */
2574 #ifndef _SYS_SYSPROTO_H_
2575 struct futimes_args {
2576 	int	fd;
2577 	struct	timeval *tptr;
2578 };
2579 #endif
2580 /* ARGSUSED */
2581 int
2582 futimes(td, uap)
2583 	struct thread *td;
2584 	register struct futimes_args /* {
2585 		syscallarg(int ) fd;
2586 		syscallarg(struct timeval *) tptr;
2587 	} */ *uap;
2588 {
2589 	struct timespec ts[2];
2590 	struct file *fp;
2591 	struct timeval *usrtvp;
2592 	int error;
2593 
2594 	usrtvp = SCARG(uap, tptr);
2595 	if ((error = getutimes(usrtvp, ts)) != 0)
2596 		return (error);
2597 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2598 		return (error);
2599 	return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2600 }
2601 
2602 /*
2603  * Truncate a file given its path name.
2604  */
2605 #ifndef _SYS_SYSPROTO_H_
2606 struct truncate_args {
2607 	char	*path;
2608 	int	pad;
2609 	off_t	length;
2610 };
2611 #endif
2612 /* ARGSUSED */
2613 int
2614 truncate(td, uap)
2615 	struct thread *td;
2616 	register struct truncate_args /* {
2617 		syscallarg(char *) path;
2618 		syscallarg(int) pad;
2619 		syscallarg(off_t) length;
2620 	} */ *uap;
2621 {
2622 	struct mount *mp;
2623 	struct vnode *vp;
2624 	struct vattr vattr;
2625 	int error;
2626 	struct nameidata nd;
2627 
2628 	if (uap->length < 0)
2629 		return(EINVAL);
2630 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2631 	if ((error = namei(&nd)) != 0)
2632 		return (error);
2633 	vp = nd.ni_vp;
2634 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2635 		vrele(vp);
2636 		return (error);
2637 	}
2638 	NDFREE(&nd, NDF_ONLY_PNBUF);
2639 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2640 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2641 	if (vp->v_type == VDIR)
2642 		error = EISDIR;
2643 	else if ((error = vn_writechk(vp)) == 0 &&
2644 	    (error = VOP_ACCESS(vp, VWRITE, td->td_proc->p_ucred, td)) == 0) {
2645 		VATTR_NULL(&vattr);
2646 		vattr.va_size = SCARG(uap, length);
2647 		error = VOP_SETATTR(vp, &vattr, td->td_proc->p_ucred, td);
2648 	}
2649 	vput(vp);
2650 	vn_finished_write(mp);
2651 	return (error);
2652 }
2653 
2654 /*
2655  * Truncate a file given a file descriptor.
2656  */
2657 #ifndef _SYS_SYSPROTO_H_
2658 struct ftruncate_args {
2659 	int	fd;
2660 	int	pad;
2661 	off_t	length;
2662 };
2663 #endif
2664 /* ARGSUSED */
2665 int
2666 ftruncate(td, uap)
2667 	struct thread *td;
2668 	register struct ftruncate_args /* {
2669 		syscallarg(int) fd;
2670 		syscallarg(int) pad;
2671 		syscallarg(off_t) length;
2672 	} */ *uap;
2673 {
2674 	struct mount *mp;
2675 	struct vattr vattr;
2676 	struct vnode *vp;
2677 	struct file *fp;
2678 	int error;
2679 
2680 	if (uap->length < 0)
2681 		return(EINVAL);
2682 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2683 		return (error);
2684 	if ((fp->f_flag & FWRITE) == 0)
2685 		return (EINVAL);
2686 	vp = (struct vnode *)fp->f_data;
2687 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2688 		return (error);
2689 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
2690 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2691 	if (vp->v_type == VDIR)
2692 		error = EISDIR;
2693 	else if ((error = vn_writechk(vp)) == 0) {
2694 		VATTR_NULL(&vattr);
2695 		vattr.va_size = SCARG(uap, length);
2696 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2697 	}
2698 	VOP_UNLOCK(vp, 0, td);
2699 	vn_finished_write(mp);
2700 	return (error);
2701 }
2702 
2703 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2704 /*
2705  * Truncate a file given its path name.
2706  */
2707 #ifndef _SYS_SYSPROTO_H_
2708 struct otruncate_args {
2709 	char	*path;
2710 	long	length;
2711 };
2712 #endif
2713 /* ARGSUSED */
2714 int
2715 otruncate(td, uap)
2716 	struct thread *td;
2717 	register struct otruncate_args /* {
2718 		syscallarg(char *) path;
2719 		syscallarg(long) length;
2720 	} */ *uap;
2721 {
2722 	struct truncate_args /* {
2723 		syscallarg(char *) path;
2724 		syscallarg(int) pad;
2725 		syscallarg(off_t) length;
2726 	} */ nuap;
2727 
2728 	SCARG(&nuap, path) = SCARG(uap, path);
2729 	SCARG(&nuap, length) = SCARG(uap, length);
2730 	return (truncate(td, &nuap));
2731 }
2732 
2733 /*
2734  * Truncate a file given a file descriptor.
2735  */
2736 #ifndef _SYS_SYSPROTO_H_
2737 struct oftruncate_args {
2738 	int	fd;
2739 	long	length;
2740 };
2741 #endif
2742 /* ARGSUSED */
2743 int
2744 oftruncate(td, uap)
2745 	struct thread *td;
2746 	register struct oftruncate_args /* {
2747 		syscallarg(int) fd;
2748 		syscallarg(long) length;
2749 	} */ *uap;
2750 {
2751 	struct ftruncate_args /* {
2752 		syscallarg(int) fd;
2753 		syscallarg(int) pad;
2754 		syscallarg(off_t) length;
2755 	} */ nuap;
2756 
2757 	SCARG(&nuap, fd) = SCARG(uap, fd);
2758 	SCARG(&nuap, length) = SCARG(uap, length);
2759 	return (ftruncate(td, &nuap));
2760 }
2761 #endif /* COMPAT_43 || COMPAT_SUNOS */
2762 
2763 /*
2764  * Sync an open file.
2765  */
2766 #ifndef _SYS_SYSPROTO_H_
2767 struct fsync_args {
2768 	int	fd;
2769 };
2770 #endif
2771 /* ARGSUSED */
2772 int
2773 fsync(td, uap)
2774 	struct thread *td;
2775 	struct fsync_args /* {
2776 		syscallarg(int) fd;
2777 	} */ *uap;
2778 {
2779 	struct vnode *vp;
2780 	struct mount *mp;
2781 	struct file *fp;
2782 	vm_object_t obj;
2783 	int error;
2784 
2785 	GIANT_REQUIRED;
2786 
2787 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2788 		return (error);
2789 	vp = (struct vnode *)fp->f_data;
2790 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2791 		return (error);
2792 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2793 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2794 		vm_object_page_clean(obj, 0, 0, 0);
2795 	}
2796 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2797 #ifdef SOFTUPDATES
2798 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2799 	    error = softdep_fsync(vp);
2800 #endif
2801 
2802 	VOP_UNLOCK(vp, 0, td);
2803 	vn_finished_write(mp);
2804 	return (error);
2805 }
2806 
2807 /*
2808  * Rename files.  Source and destination must either both be directories,
2809  * or both not be directories.  If target is a directory, it must be empty.
2810  */
2811 #ifndef _SYS_SYSPROTO_H_
2812 struct rename_args {
2813 	char	*from;
2814 	char	*to;
2815 };
2816 #endif
2817 /* ARGSUSED */
2818 int
2819 rename(td, uap)
2820 	struct thread *td;
2821 	register struct rename_args /* {
2822 		syscallarg(char *) from;
2823 		syscallarg(char *) to;
2824 	} */ *uap;
2825 {
2826 	struct mount *mp;
2827 	struct vnode *tvp, *fvp, *tdvp;
2828 	struct nameidata fromnd, tond;
2829 	int error;
2830 
2831 	bwillwrite();
2832 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2833 	    SCARG(uap, from), td);
2834 	if ((error = namei(&fromnd)) != 0)
2835 		return (error);
2836 	fvp = fromnd.ni_vp;
2837 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2838 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2839 		vrele(fromnd.ni_dvp);
2840 		vrele(fvp);
2841 		goto out1;
2842 	}
2843 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2844 	    UIO_USERSPACE, SCARG(uap, to), td);
2845 	if (fromnd.ni_vp->v_type == VDIR)
2846 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2847 	if ((error = namei(&tond)) != 0) {
2848 		/* Translate error code for rename("dir1", "dir2/."). */
2849 		if (error == EISDIR && fvp->v_type == VDIR)
2850 			error = EINVAL;
2851 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2852 		vrele(fromnd.ni_dvp);
2853 		vrele(fvp);
2854 		goto out1;
2855 	}
2856 	tdvp = tond.ni_dvp;
2857 	tvp = tond.ni_vp;
2858 	if (tvp != NULL) {
2859 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2860 			error = ENOTDIR;
2861 			goto out;
2862 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2863 			error = EISDIR;
2864 			goto out;
2865 		}
2866 	}
2867 	if (fvp == tdvp)
2868 		error = EINVAL;
2869 	/*
2870 	 * If source is the same as the destination (that is the
2871 	 * same inode number with the same name in the same directory),
2872 	 * then there is nothing to do.
2873 	 */
2874 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2875 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2876 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2877 	      fromnd.ni_cnd.cn_namelen))
2878 		error = -1;
2879 out:
2880 	if (!error) {
2881 		VOP_LEASE(tdvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2882 		if (fromnd.ni_dvp != tdvp) {
2883 			VOP_LEASE(fromnd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2884 		}
2885 		if (tvp) {
2886 			VOP_LEASE(tvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2887 		}
2888 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2889 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2890 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2891 		NDFREE(&tond, NDF_ONLY_PNBUF);
2892 	} else {
2893 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2894 		NDFREE(&tond, NDF_ONLY_PNBUF);
2895 		if (tdvp == tvp)
2896 			vrele(tdvp);
2897 		else
2898 			vput(tdvp);
2899 		if (tvp)
2900 			vput(tvp);
2901 		vrele(fromnd.ni_dvp);
2902 		vrele(fvp);
2903 	}
2904 	vrele(tond.ni_startdir);
2905 	vn_finished_write(mp);
2906 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2907 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2908 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2909 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2910 out1:
2911 	if (fromnd.ni_startdir)
2912 		vrele(fromnd.ni_startdir);
2913 	if (error == -1)
2914 		return (0);
2915 	return (error);
2916 }
2917 
2918 /*
2919  * Make a directory file.
2920  */
2921 #ifndef _SYS_SYSPROTO_H_
2922 struct mkdir_args {
2923 	char	*path;
2924 	int	mode;
2925 };
2926 #endif
2927 /* ARGSUSED */
2928 int
2929 mkdir(td, uap)
2930 	struct thread *td;
2931 	register struct mkdir_args /* {
2932 		syscallarg(char *) path;
2933 		syscallarg(int) mode;
2934 	} */ *uap;
2935 {
2936 
2937 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
2938 }
2939 
2940 int
2941 vn_mkdir(path, mode, segflg, td)
2942 	char *path;
2943 	int mode;
2944 	enum uio_seg segflg;
2945 	struct thread *td;
2946 {
2947 	struct mount *mp;
2948 	struct vnode *vp;
2949 	struct vattr vattr;
2950 	int error;
2951 	struct nameidata nd;
2952 
2953 restart:
2954 	bwillwrite();
2955 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
2956 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2957 	if ((error = namei(&nd)) != 0)
2958 		return (error);
2959 	vp = nd.ni_vp;
2960 	if (vp != NULL) {
2961 		NDFREE(&nd, NDF_ONLY_PNBUF);
2962 		vrele(vp);
2963 		vput(nd.ni_dvp);
2964 		return (EEXIST);
2965 	}
2966 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2967 		NDFREE(&nd, NDF_ONLY_PNBUF);
2968 		vput(nd.ni_dvp);
2969 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2970 			return (error);
2971 		goto restart;
2972 	}
2973 	VATTR_NULL(&vattr);
2974 	vattr.va_type = VDIR;
2975 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2976 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
2977 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2978 	NDFREE(&nd, NDF_ONLY_PNBUF);
2979 	vput(nd.ni_dvp);
2980 	if (!error)
2981 		vput(nd.ni_vp);
2982 	vn_finished_write(mp);
2983 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2984 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2985 	return (error);
2986 }
2987 
2988 /*
2989  * Remove a directory file.
2990  */
2991 #ifndef _SYS_SYSPROTO_H_
2992 struct rmdir_args {
2993 	char	*path;
2994 };
2995 #endif
2996 /* ARGSUSED */
2997 int
2998 rmdir(td, uap)
2999 	struct thread *td;
3000 	struct rmdir_args /* {
3001 		syscallarg(char *) path;
3002 	} */ *uap;
3003 {
3004 	struct mount *mp;
3005 	struct vnode *vp;
3006 	int error;
3007 	struct nameidata nd;
3008 
3009 restart:
3010 	bwillwrite();
3011 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3012 	    SCARG(uap, path), td);
3013 	if ((error = namei(&nd)) != 0)
3014 		return (error);
3015 	vp = nd.ni_vp;
3016 	if (vp->v_type != VDIR) {
3017 		error = ENOTDIR;
3018 		goto out;
3019 	}
3020 	/*
3021 	 * No rmdir "." please.
3022 	 */
3023 	if (nd.ni_dvp == vp) {
3024 		error = EINVAL;
3025 		goto out;
3026 	}
3027 	/*
3028 	 * The root of a mounted filesystem cannot be deleted.
3029 	 */
3030 	if (vp->v_flag & VROOT) {
3031 		error = EBUSY;
3032 		goto out;
3033 	}
3034 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3035 		NDFREE(&nd, NDF_ONLY_PNBUF);
3036 		if (nd.ni_dvp == vp)
3037 			vrele(nd.ni_dvp);
3038 		else
3039 			vput(nd.ni_dvp);
3040 		vput(vp);
3041 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3042 			return (error);
3043 		goto restart;
3044 	}
3045 	VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE);
3046 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
3047 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3048 	vn_finished_write(mp);
3049 out:
3050 	NDFREE(&nd, NDF_ONLY_PNBUF);
3051 	if (nd.ni_dvp == vp)
3052 		vrele(nd.ni_dvp);
3053 	else
3054 		vput(nd.ni_dvp);
3055 	vput(vp);
3056 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3057 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3058 	return (error);
3059 }
3060 
3061 #ifdef COMPAT_43
3062 /*
3063  * Read a block of directory entries in a file system independent format.
3064  */
3065 #ifndef _SYS_SYSPROTO_H_
3066 struct ogetdirentries_args {
3067 	int	fd;
3068 	char	*buf;
3069 	u_int	count;
3070 	long	*basep;
3071 };
3072 #endif
3073 int
3074 ogetdirentries(td, uap)
3075 	struct thread *td;
3076 	register struct ogetdirentries_args /* {
3077 		syscallarg(int) fd;
3078 		syscallarg(char *) buf;
3079 		syscallarg(u_int) count;
3080 		syscallarg(long *) basep;
3081 	} */ *uap;
3082 {
3083 	struct vnode *vp;
3084 	struct file *fp;
3085 	struct uio auio, kuio;
3086 	struct iovec aiov, kiov;
3087 	struct dirent *dp, *edp;
3088 	caddr_t dirbuf;
3089 	int error, eofflag, readcnt;
3090 	long loff;
3091 
3092 	/* XXX arbitrary sanity limit on `count'. */
3093 	if (SCARG(uap, count) > 64 * 1024)
3094 		return (EINVAL);
3095 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3096 		return (error);
3097 	if ((fp->f_flag & FREAD) == 0)
3098 		return (EBADF);
3099 	vp = (struct vnode *)fp->f_data;
3100 unionread:
3101 	if (vp->v_type != VDIR)
3102 		return (EINVAL);
3103 	aiov.iov_base = SCARG(uap, buf);
3104 	aiov.iov_len = SCARG(uap, count);
3105 	auio.uio_iov = &aiov;
3106 	auio.uio_iovcnt = 1;
3107 	auio.uio_rw = UIO_READ;
3108 	auio.uio_segflg = UIO_USERSPACE;
3109 	auio.uio_td = td;
3110 	auio.uio_resid = SCARG(uap, count);
3111 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3112 	loff = auio.uio_offset = fp->f_offset;
3113 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3114 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3115 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3116 			    NULL, NULL);
3117 			fp->f_offset = auio.uio_offset;
3118 		} else
3119 #	endif
3120 	{
3121 		kuio = auio;
3122 		kuio.uio_iov = &kiov;
3123 		kuio.uio_segflg = UIO_SYSSPACE;
3124 		kiov.iov_len = SCARG(uap, count);
3125 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3126 		kiov.iov_base = dirbuf;
3127 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3128 			    NULL, NULL);
3129 		fp->f_offset = kuio.uio_offset;
3130 		if (error == 0) {
3131 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3132 			edp = (struct dirent *)&dirbuf[readcnt];
3133 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3134 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3135 					/*
3136 					 * The expected low byte of
3137 					 * dp->d_namlen is our dp->d_type.
3138 					 * The high MBZ byte of dp->d_namlen
3139 					 * is our dp->d_namlen.
3140 					 */
3141 					dp->d_type = dp->d_namlen;
3142 					dp->d_namlen = 0;
3143 #				else
3144 					/*
3145 					 * The dp->d_type is the high byte
3146 					 * of the expected dp->d_namlen,
3147 					 * so must be zero'ed.
3148 					 */
3149 					dp->d_type = 0;
3150 #				endif
3151 				if (dp->d_reclen > 0) {
3152 					dp = (struct dirent *)
3153 					    ((char *)dp + dp->d_reclen);
3154 				} else {
3155 					error = EIO;
3156 					break;
3157 				}
3158 			}
3159 			if (dp >= edp)
3160 				error = uiomove(dirbuf, readcnt, &auio);
3161 		}
3162 		FREE(dirbuf, M_TEMP);
3163 	}
3164 	VOP_UNLOCK(vp, 0, td);
3165 	if (error)
3166 		return (error);
3167 	if (SCARG(uap, count) == auio.uio_resid) {
3168 		if (union_dircheckp) {
3169 			error = union_dircheckp(td, &vp, fp);
3170 			if (error == -1)
3171 				goto unionread;
3172 			if (error)
3173 				return (error);
3174 		}
3175 		if ((vp->v_flag & VROOT) &&
3176 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3177 			struct vnode *tvp = vp;
3178 			vp = vp->v_mount->mnt_vnodecovered;
3179 			VREF(vp);
3180 			fp->f_data = (caddr_t) vp;
3181 			fp->f_offset = 0;
3182 			vrele(tvp);
3183 			goto unionread;
3184 		}
3185 	}
3186 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3187 	    sizeof(long));
3188 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3189 	return (error);
3190 }
3191 #endif /* COMPAT_43 */
3192 
3193 /*
3194  * Read a block of directory entries in a file system independent format.
3195  */
3196 #ifndef _SYS_SYSPROTO_H_
3197 struct getdirentries_args {
3198 	int	fd;
3199 	char	*buf;
3200 	u_int	count;
3201 	long	*basep;
3202 };
3203 #endif
3204 int
3205 getdirentries(td, uap)
3206 	struct thread *td;
3207 	register struct getdirentries_args /* {
3208 		syscallarg(int) fd;
3209 		syscallarg(char *) buf;
3210 		syscallarg(u_int) count;
3211 		syscallarg(long *) basep;
3212 	} */ *uap;
3213 {
3214 	struct vnode *vp;
3215 	struct file *fp;
3216 	struct uio auio;
3217 	struct iovec aiov;
3218 	long loff;
3219 	int error, eofflag;
3220 
3221 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3222 		return (error);
3223 	if ((fp->f_flag & FREAD) == 0)
3224 		return (EBADF);
3225 	vp = (struct vnode *)fp->f_data;
3226 unionread:
3227 	if (vp->v_type != VDIR)
3228 		return (EINVAL);
3229 	aiov.iov_base = SCARG(uap, buf);
3230 	aiov.iov_len = SCARG(uap, count);
3231 	auio.uio_iov = &aiov;
3232 	auio.uio_iovcnt = 1;
3233 	auio.uio_rw = UIO_READ;
3234 	auio.uio_segflg = UIO_USERSPACE;
3235 	auio.uio_td = td;
3236 	auio.uio_resid = SCARG(uap, count);
3237 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3238 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3239 	loff = auio.uio_offset = fp->f_offset;
3240 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3241 	fp->f_offset = auio.uio_offset;
3242 	VOP_UNLOCK(vp, 0, td);
3243 	if (error)
3244 		return (error);
3245 	if (SCARG(uap, count) == auio.uio_resid) {
3246 		if (union_dircheckp) {
3247 			error = union_dircheckp(td, &vp, fp);
3248 			if (error == -1)
3249 				goto unionread;
3250 			if (error)
3251 				return (error);
3252 		}
3253 		if ((vp->v_flag & VROOT) &&
3254 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3255 			struct vnode *tvp = vp;
3256 			vp = vp->v_mount->mnt_vnodecovered;
3257 			VREF(vp);
3258 			fp->f_data = (caddr_t) vp;
3259 			fp->f_offset = 0;
3260 			vrele(tvp);
3261 			goto unionread;
3262 		}
3263 	}
3264 	if (SCARG(uap, basep) != NULL) {
3265 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3266 		    sizeof(long));
3267 	}
3268 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3269 	return (error);
3270 }
3271 #ifndef _SYS_SYSPROTO_H_
3272 struct getdents_args {
3273 	int fd;
3274 	char *buf;
3275 	size_t count;
3276 };
3277 #endif
3278 int
3279 getdents(td, uap)
3280 	struct thread *td;
3281 	register struct getdents_args /* {
3282 		syscallarg(int) fd;
3283 		syscallarg(char *) buf;
3284 		syscallarg(u_int) count;
3285 	} */ *uap;
3286 {
3287 	struct getdirentries_args ap;
3288 	ap.fd = uap->fd;
3289 	ap.buf = uap->buf;
3290 	ap.count = uap->count;
3291 	ap.basep = NULL;
3292 	return getdirentries(td, &ap);
3293 }
3294 
3295 /*
3296  * Set the mode mask for creation of filesystem nodes.
3297  *
3298  * MP SAFE
3299  */
3300 #ifndef _SYS_SYSPROTO_H_
3301 struct umask_args {
3302 	int	newmask;
3303 };
3304 #endif
3305 int
3306 umask(td, uap)
3307 	struct thread *td;
3308 	struct umask_args /* {
3309 		syscallarg(int) newmask;
3310 	} */ *uap;
3311 {
3312 	register struct filedesc *fdp;
3313 
3314 	fdp = td->td_proc->p_fd;
3315 	td->td_retval[0] = fdp->fd_cmask;
3316 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3317 	return (0);
3318 }
3319 
3320 /*
3321  * Void all references to file by ripping underlying filesystem
3322  * away from vnode.
3323  */
3324 #ifndef _SYS_SYSPROTO_H_
3325 struct revoke_args {
3326 	char	*path;
3327 };
3328 #endif
3329 /* ARGSUSED */
3330 int
3331 revoke(td, uap)
3332 	struct thread *td;
3333 	register struct revoke_args /* {
3334 		syscallarg(char *) path;
3335 	} */ *uap;
3336 {
3337 	struct mount *mp;
3338 	struct vnode *vp;
3339 	struct vattr vattr;
3340 	int error;
3341 	struct nameidata nd;
3342 
3343 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3344 	if ((error = namei(&nd)) != 0)
3345 		return (error);
3346 	vp = nd.ni_vp;
3347 	NDFREE(&nd, NDF_ONLY_PNBUF);
3348 	if (vp->v_type != VCHR) {
3349 		error = EINVAL;
3350 		goto out;
3351 	}
3352 	if ((error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td)) != 0)
3353 		goto out;
3354 	if (td->td_proc->p_ucred->cr_uid != vattr.va_uid &&
3355 	    (error = suser_xxx(0, td->td_proc, PRISON_ROOT)))
3356 		goto out;
3357 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3358 		goto out;
3359 	if (vcount(vp) > 1)
3360 		VOP_REVOKE(vp, REVOKEALL);
3361 	vn_finished_write(mp);
3362 out:
3363 	vrele(vp);
3364 	return (error);
3365 }
3366 
3367 /*
3368  * Convert a user file descriptor to a kernel file entry.
3369  */
3370 int
3371 getvnode(fdp, fd, fpp)
3372 	struct filedesc *fdp;
3373 	int fd;
3374 	struct file **fpp;
3375 {
3376 	struct file *fp;
3377 
3378 	if ((u_int)fd >= fdp->fd_nfiles ||
3379 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3380 		return (EBADF);
3381 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3382 		return (EINVAL);
3383 	*fpp = fp;
3384 	return (0);
3385 }
3386 /*
3387  * Get (NFS) file handle
3388  */
3389 #ifndef _SYS_SYSPROTO_H_
3390 struct getfh_args {
3391 	char	*fname;
3392 	fhandle_t *fhp;
3393 };
3394 #endif
3395 int
3396 getfh(td, uap)
3397 	struct thread *td;
3398 	register struct getfh_args *uap;
3399 {
3400 	struct nameidata nd;
3401 	fhandle_t fh;
3402 	register struct vnode *vp;
3403 	int error;
3404 
3405 	/*
3406 	 * Must be super user
3407 	 */
3408 	error = suser_td(td);
3409 	if (error)
3410 		return (error);
3411 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3412 	error = namei(&nd);
3413 	if (error)
3414 		return (error);
3415 	NDFREE(&nd, NDF_ONLY_PNBUF);
3416 	vp = nd.ni_vp;
3417 	bzero(&fh, sizeof(fh));
3418 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3419 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3420 	vput(vp);
3421 	if (error)
3422 		return (error);
3423 	error = copyout(&fh, uap->fhp, sizeof (fh));
3424 	return (error);
3425 }
3426 
3427 /*
3428  * syscall for the rpc.lockd to use to translate a NFS file handle into
3429  * an open descriptor.
3430  *
3431  * warning: do not remove the suser() call or this becomes one giant
3432  * security hole.
3433  */
3434 #ifndef _SYS_SYSPROTO_H_
3435 struct fhopen_args {
3436 	const struct fhandle *u_fhp;
3437 	int flags;
3438 };
3439 #endif
3440 int
3441 fhopen(td, uap)
3442 	struct thread *td;
3443 	struct fhopen_args /* {
3444 		syscallarg(const struct fhandle *) u_fhp;
3445 		syscallarg(int) flags;
3446 	} */ *uap;
3447 {
3448 	struct proc *p = td->td_proc;
3449 	struct mount *mp;
3450 	struct vnode *vp;
3451 	struct fhandle fhp;
3452 	struct vattr vat;
3453 	struct vattr *vap = &vat;
3454 	struct flock lf;
3455 	struct file *fp;
3456 	register struct filedesc *fdp = p->p_fd;
3457 	int fmode, mode, error, type;
3458 	struct file *nfp;
3459 	int indx;
3460 
3461 	/*
3462 	 * Must be super user
3463 	 */
3464 	error = suser_td(td);
3465 	if (error)
3466 		return (error);
3467 
3468 	fmode = FFLAGS(SCARG(uap, flags));
3469 	/* why not allow a non-read/write open for our lockd? */
3470 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3471 		return (EINVAL);
3472 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3473 	if (error)
3474 		return(error);
3475 	/* find the mount point */
3476 	mp = vfs_getvfs(&fhp.fh_fsid);
3477 	if (mp == NULL)
3478 		return (ESTALE);
3479 	/* now give me my vnode, it gets returned to me locked */
3480 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3481 	if (error)
3482 		return (error);
3483  	/*
3484 	 * from now on we have to make sure not
3485 	 * to forget about the vnode
3486 	 * any error that causes an abort must vput(vp)
3487 	 * just set error = err and 'goto bad;'.
3488 	 */
3489 
3490 	/*
3491 	 * from vn_open
3492 	 */
3493 	if (vp->v_type == VLNK) {
3494 		error = EMLINK;
3495 		goto bad;
3496 	}
3497 	if (vp->v_type == VSOCK) {
3498 		error = EOPNOTSUPP;
3499 		goto bad;
3500 	}
3501 	mode = 0;
3502 	if (fmode & (FWRITE | O_TRUNC)) {
3503 		if (vp->v_type == VDIR) {
3504 			error = EISDIR;
3505 			goto bad;
3506 		}
3507 		error = vn_writechk(vp);
3508 		if (error)
3509 			goto bad;
3510 		mode |= VWRITE;
3511 	}
3512 	if (fmode & FREAD)
3513 		mode |= VREAD;
3514 	if (mode) {
3515 		error = VOP_ACCESS(vp, mode, p->p_ucred, td);
3516 		if (error)
3517 			goto bad;
3518 	}
3519 	if (fmode & O_TRUNC) {
3520 		VOP_UNLOCK(vp, 0, td);				/* XXX */
3521 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3522 			vrele(vp);
3523 			return (error);
3524 		}
3525 		VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE);
3526 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3527 		VATTR_NULL(vap);
3528 		vap->va_size = 0;
3529 		error = VOP_SETATTR(vp, vap, p->p_ucred, td);
3530 		vn_finished_write(mp);
3531 		if (error)
3532 			goto bad;
3533 	}
3534 	error = VOP_OPEN(vp, fmode, p->p_ucred, td);
3535 	if (error)
3536 		goto bad;
3537 	/*
3538 	 * Make sure that a VM object is created for VMIO support.
3539 	 */
3540 	if (vn_canvmio(vp) == TRUE) {
3541 		if ((error = vfs_object_create(vp, td, p->p_ucred)) != 0)
3542 			goto bad;
3543 	}
3544 	if (fmode & FWRITE)
3545 		vp->v_writecount++;
3546 
3547 	/*
3548 	 * end of vn_open code
3549 	 */
3550 
3551 	if ((error = falloc(td, &nfp, &indx)) != 0)
3552 		goto bad;
3553 	fp = nfp;
3554 
3555 	/*
3556 	 * Hold an extra reference to avoid having fp ripped out
3557 	 * from under us while we block in the lock op
3558 	 */
3559 	fhold(fp);
3560 	nfp->f_data = (caddr_t)vp;
3561 	nfp->f_flag = fmode & FMASK;
3562 	nfp->f_ops = &vnops;
3563 	nfp->f_type = DTYPE_VNODE;
3564 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3565 		lf.l_whence = SEEK_SET;
3566 		lf.l_start = 0;
3567 		lf.l_len = 0;
3568 		if (fmode & O_EXLOCK)
3569 			lf.l_type = F_WRLCK;
3570 		else
3571 			lf.l_type = F_RDLCK;
3572 		type = F_FLOCK;
3573 		if ((fmode & FNONBLOCK) == 0)
3574 			type |= F_WAIT;
3575 		VOP_UNLOCK(vp, 0, td);
3576 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3577 			/*
3578 			 * The lock request failed.  Normally close the
3579 			 * descriptor but handle the case where someone might
3580 			 * have dup()d or close()d it when we weren't looking.
3581 			 */
3582 			if (fdp->fd_ofiles[indx] == fp) {
3583 				fdp->fd_ofiles[indx] = NULL;
3584 				fdrop(fp, td);
3585 			}
3586 			/*
3587 			 * release our private reference
3588 			 */
3589 			fdrop(fp, td);
3590 			return(error);
3591 		}
3592 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3593 		fp->f_flag |= FHASLOCK;
3594 	}
3595 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3596 		vfs_object_create(vp, td, p->p_ucred);
3597 
3598 	VOP_UNLOCK(vp, 0, td);
3599 	fdrop(fp, td);
3600 	td->td_retval[0] = indx;
3601 	return (0);
3602 
3603 bad:
3604 	vput(vp);
3605 	return (error);
3606 }
3607 
3608 /*
3609  * Stat an (NFS) file handle.
3610  */
3611 #ifndef _SYS_SYSPROTO_H_
3612 struct fhstat_args {
3613 	struct fhandle *u_fhp;
3614 	struct stat *sb;
3615 };
3616 #endif
3617 int
3618 fhstat(td, uap)
3619 	struct thread *td;
3620 	register struct fhstat_args /* {
3621 		syscallarg(struct fhandle *) u_fhp;
3622 		syscallarg(struct stat *) sb;
3623 	} */ *uap;
3624 {
3625 	struct stat sb;
3626 	fhandle_t fh;
3627 	struct mount *mp;
3628 	struct vnode *vp;
3629 	int error;
3630 
3631 	/*
3632 	 * Must be super user
3633 	 */
3634 	error = suser_td(td);
3635 	if (error)
3636 		return (error);
3637 
3638 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3639 	if (error)
3640 		return (error);
3641 
3642 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3643 		return (ESTALE);
3644 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3645 		return (error);
3646 	error = vn_stat(vp, &sb, td);
3647 	vput(vp);
3648 	if (error)
3649 		return (error);
3650 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3651 	return (error);
3652 }
3653 
3654 /*
3655  * Implement fstatfs() for (NFS) file handles.
3656  */
3657 #ifndef _SYS_SYSPROTO_H_
3658 struct fhstatfs_args {
3659 	struct fhandle *u_fhp;
3660 	struct statfs *buf;
3661 };
3662 #endif
3663 int
3664 fhstatfs(td, uap)
3665 	struct thread *td;
3666 	struct fhstatfs_args /* {
3667 		syscallarg(struct fhandle) *u_fhp;
3668 		syscallarg(struct statfs) *buf;
3669 	} */ *uap;
3670 {
3671 	struct statfs *sp;
3672 	struct mount *mp;
3673 	struct vnode *vp;
3674 	struct statfs sb;
3675 	fhandle_t fh;
3676 	int error;
3677 
3678 	/*
3679 	 * Must be super user
3680 	 */
3681 	if ((error = suser_td(td)))
3682 		return (error);
3683 
3684 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3685 		return (error);
3686 
3687 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3688 		return (ESTALE);
3689 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3690 		return (error);
3691 	mp = vp->v_mount;
3692 	sp = &mp->mnt_stat;
3693 	vput(vp);
3694 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3695 		return (error);
3696 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3697 	if (suser_xxx(td->td_proc->p_ucred, 0, 0)) {
3698 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3699 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3700 		sp = &sb;
3701 	}
3702 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3703 }
3704 
3705 /*
3706  * Syscall to push extended attribute configuration information into the
3707  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3708  * a command (int cmd), and attribute name and misc data.  For now, the
3709  * attribute name is left in userspace for consumption by the VFS_op.
3710  * It will probably be changed to be copied into sysspace by the
3711  * syscall in the future, once issues with various consumers of the
3712  * attribute code have raised their hands.
3713  *
3714  * Currently this is used only by UFS Extended Attributes.
3715  */
3716 int
3717 extattrctl(td, uap)
3718 	struct thread *td;
3719 	struct extattrctl_args *uap;
3720 {
3721 	struct vnode *filename_vp;
3722 	struct nameidata nd;
3723 	struct mount *mp;
3724 	char attrname[EXTATTR_MAXNAMELEN];
3725 	int error;
3726 
3727 	/*
3728 	 * SCARG(uap, attrname) not always defined.  We check again later
3729 	 * when we invoke the VFS call so as to pass in NULL there if needed.
3730 	 */
3731 	if (SCARG(uap, attrname) != NULL) {
3732 		error = copyinstr(SCARG(uap, attrname), attrname,
3733 		    EXTATTR_MAXNAMELEN, NULL);
3734 		if (error)
3735 			return (error);
3736 	}
3737 
3738 	/*
3739 	 * SCARG(uap, filename) not always defined.  If it is, grab
3740 	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3741 	 */
3742 	filename_vp = NULL;
3743 	if (SCARG(uap, filename) != NULL) {
3744 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3745 		    SCARG(uap, filename), td);
3746 		if ((error = namei(&nd)) != 0)
3747 			return (error);
3748 		filename_vp = nd.ni_vp;
3749 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3750 	}
3751 
3752 	/* SCARG(uap, path) always defined. */
3753 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3754 	if ((error = namei(&nd)) != 0)
3755 		return (error);
3756 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3757 	NDFREE(&nd, 0);
3758 	if (error) {
3759 		if (filename_vp)
3760 			vrele(filename_vp);
3761 		return (error);
3762 	}
3763 
3764 	if (SCARG(uap, attrname) != NULL) {
3765 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3766 		    SCARG(uap, attrnamespace), attrname, td);
3767 	} else {
3768 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3769 		    SCARG(uap, attrnamespace), NULL, td);
3770 	}
3771 
3772 	vn_finished_write(mp);
3773 	/*
3774 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3775 	 * filename_vp, so vrele it if it is defined.
3776 	 */
3777 	if (filename_vp != NULL)
3778 		vrele(filename_vp);
3779 
3780 	return (error);
3781 }
3782 
3783 /*
3784  * extattr_set_vp(): Set a named extended attribute on a file or directory
3785  *
3786  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3787  *            kernelspace string pointer "attrname",
3788  *            userspace iovec array pointer "iovp", unsigned int iovcnt
3789  *            proc "p"
3790  * Returns: 0 on success, an error number otherwise
3791  * Locks: none
3792  * References: vp must be a valid reference for the duration of the call
3793  */
3794 static int
3795 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3796     struct iovec *iovp, unsigned iovcnt, struct thread *td)
3797 {
3798 	struct mount *mp;
3799 	struct uio auio;
3800 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3801 	u_int iovlen, cnt;
3802 	int error, i;
3803 
3804 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3805 		return (error);
3806 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
3807 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3808 
3809 	iovlen = iovcnt * sizeof(struct iovec);
3810 	if (iovcnt > UIO_SMALLIOV) {
3811 		if (iovcnt > UIO_MAXIOV) {
3812 			error = EINVAL;
3813 			goto done;
3814 		}
3815 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3816 		needfree = iov;
3817 	} else
3818 		iov = aiov;
3819 	auio.uio_iov = iov;
3820 	auio.uio_iovcnt = iovcnt;
3821 	auio.uio_rw = UIO_WRITE;
3822 	auio.uio_segflg = UIO_USERSPACE;
3823 	auio.uio_td = td;
3824 	auio.uio_offset = 0;
3825 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3826 		goto done;
3827 	auio.uio_resid = 0;
3828 	for (i = 0; i < iovcnt; i++) {
3829 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3830 			error = EINVAL;
3831 			goto done;
3832 		}
3833 		auio.uio_resid += iov->iov_len;
3834 		iov++;
3835 	}
3836 	cnt = auio.uio_resid;
3837 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3838 	    td->td_proc->p_ucred, td);
3839 	cnt -= auio.uio_resid;
3840 	td->td_retval[0] = cnt;
3841 done:
3842 	if (needfree)
3843 		FREE(needfree, M_IOV);
3844 	VOP_UNLOCK(vp, 0, td);
3845 	vn_finished_write(mp);
3846 	return (error);
3847 }
3848 
3849 int
3850 extattr_set_file(td, uap)
3851 	struct thread *td;
3852 	struct extattr_set_file_args *uap;
3853 {
3854 	struct nameidata nd;
3855 	char attrname[EXTATTR_MAXNAMELEN];
3856 	int error;
3857 
3858 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3859 	    NULL);
3860 	if (error)
3861 		return (error);
3862 
3863 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3864 	if ((error = namei(&nd)) != 0)
3865 		return (error);
3866 	NDFREE(&nd, NDF_ONLY_PNBUF);
3867 
3868 	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3869 	    SCARG(uap, iovp), SCARG(uap, iovcnt), td);
3870 
3871 	vrele(nd.ni_vp);
3872 	return (error);
3873 }
3874 
3875 int
3876 extattr_set_fd(td, uap)
3877 	struct thread *td;
3878 	struct extattr_set_fd_args *uap;
3879 {
3880 	struct file *fp;
3881 	char attrname[EXTATTR_MAXNAMELEN];
3882 	int error;
3883 
3884 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3885 	    NULL);
3886 	if (error)
3887 		return (error);
3888 
3889 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3890 		return (error);
3891 
3892 	error = extattr_set_vp((struct vnode *)fp->f_data,
3893 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3894 	    SCARG(uap, iovcnt), td);
3895 
3896 	return (error);
3897 }
3898 
3899 /*
3900  * extattr_get_vp(): Get a named extended attribute on a file or directory
3901  *
3902  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3903  *            kernelspace string pointer "attrname",
3904  *            userspace iovec array pointer "iovp", unsigned int iovcnt,
3905  *            proc "p"
3906  * Returns: 0 on success, an error number otherwise
3907  * Locks: none
3908  * References: vp must be a valid reference for the duration of the call
3909  */
3910 static int
3911 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3912     struct iovec *iovp, unsigned iovcnt, struct thread *td)
3913 {
3914 	struct uio auio;
3915 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3916 	u_int iovlen, cnt;
3917 	int error, i;
3918 
3919 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_READ);
3920 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3921 
3922 	iovlen = iovcnt * sizeof (struct iovec);
3923 	if (iovcnt > UIO_SMALLIOV) {
3924 		if (iovcnt > UIO_MAXIOV) {
3925 			error = EINVAL;
3926 			goto done;
3927 		}
3928 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3929 		needfree = iov;
3930 	} else
3931 		iov = aiov;
3932 	auio.uio_iov = iov;
3933 	auio.uio_iovcnt = iovcnt;
3934 	auio.uio_rw = UIO_READ;
3935 	auio.uio_segflg = UIO_USERSPACE;
3936 	auio.uio_td = td;
3937 	auio.uio_offset = 0;
3938 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3939 		goto done;
3940 	auio.uio_resid = 0;
3941 	for (i = 0; i < iovcnt; i++) {
3942 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3943 			error = EINVAL;
3944 			goto done;
3945 		}
3946 		auio.uio_resid += iov->iov_len;
3947 		iov++;
3948 	}
3949 	cnt = auio.uio_resid;
3950 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
3951 	    td->td_proc->p_ucred, td);
3952 	cnt -= auio.uio_resid;
3953 	td->td_retval[0] = cnt;
3954 done:
3955 	if (needfree)
3956 		FREE(needfree, M_IOV);
3957 	VOP_UNLOCK(vp, 0, td);
3958 	return (error);
3959 }
3960 
3961 int
3962 extattr_get_file(td, uap)
3963 	struct thread *td;
3964 	struct extattr_get_file_args *uap;
3965 {
3966 	struct nameidata nd;
3967 	char attrname[EXTATTR_MAXNAMELEN];
3968 	int error;
3969 
3970 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3971 	    NULL);
3972 	if (error)
3973 		return (error);
3974 
3975 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3976 	if ((error = namei(&nd)) != 0)
3977 		return (error);
3978 	NDFREE(&nd, NDF_ONLY_PNBUF);
3979 
3980 	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3981 	    SCARG(uap, iovp), SCARG(uap, iovcnt), td);
3982 
3983 	vrele(nd.ni_vp);
3984 	return (error);
3985 }
3986 
3987 int
3988 extattr_get_fd(td, uap)
3989 	struct thread *td;
3990 	struct extattr_get_fd_args *uap;
3991 {
3992 	struct file *fp;
3993 	char attrname[EXTATTR_MAXNAMELEN];
3994 	int error;
3995 
3996 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3997 	    NULL);
3998 	if (error)
3999 		return (error);
4000 
4001 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4002 		return (error);
4003 
4004 	error = extattr_get_vp((struct vnode *)fp->f_data,
4005 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
4006 	    SCARG(uap, iovcnt), td);
4007 
4008 	return (error);
4009 }
4010 
4011 /*
4012  * extattr_delete_vp(): Delete a named extended attribute on a file or
4013  *                      directory
4014  *
4015  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4016  *            kernelspace string pointer "attrname", proc "p"
4017  * Returns: 0 on success, an error number otherwise
4018  * Locks: none
4019  * References: vp must be a valid reference for the duration of the call
4020  */
4021 static int
4022 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4023     struct thread *td)
4024 {
4025 	struct mount *mp;
4026 	int error;
4027 
4028 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4029 		return (error);
4030 	VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE);
4031 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4032 
4033 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4034 	    td->td_proc->p_ucred, td);
4035 
4036 	VOP_UNLOCK(vp, 0, td);
4037 	vn_finished_write(mp);
4038 	return (error);
4039 }
4040 
4041 int
4042 extattr_delete_file(td, uap)
4043 	struct thread *td;
4044 	struct extattr_delete_file_args *uap;
4045 {
4046 	struct nameidata nd;
4047 	char attrname[EXTATTR_MAXNAMELEN];
4048 	int error;
4049 
4050 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4051 	     NULL);
4052 	if (error)
4053 		return(error);
4054 
4055 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4056 	if ((error = namei(&nd)) != 0)
4057 		return(error);
4058 	NDFREE(&nd, NDF_ONLY_PNBUF);
4059 
4060 	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4061 	    attrname, td);
4062 
4063 	vrele(nd.ni_vp);
4064 	return(error);
4065 }
4066 
4067 int
4068 extattr_delete_fd(td, uap)
4069 	struct thread *td;
4070 	struct extattr_delete_fd_args *uap;
4071 {
4072 	struct file *fp;
4073 	char attrname[EXTATTR_MAXNAMELEN];
4074 	int error;
4075 
4076 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4077 	    NULL);
4078 	if (error)
4079 		return (error);
4080 
4081 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4082 		return (error);
4083 
4084 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4085 	    SCARG(uap, attrnamespace), attrname, td);
4086 
4087 	return (error);
4088 }
4089