xref: /freebsd/sys/kern/vfs_extattr.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 
70 #include <machine/limits.h>
71 #include <sys/sysctl.h>
72 #include <vm/vm.h>
73 #include <vm/vm_object.h>
74 #include <vm/vm_zone.h>
75 #include <vm/vm_page.h>
76 
77 static int change_dir __P((struct nameidata *ndp, struct proc *p));
78 static void checkdirs __P((struct vnode *olddp, struct vnode *newdp));
79 static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
80 static int getutimes __P((const struct timeval *, struct timespec *));
81 static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
82 static int setfmode __P((struct proc *, struct vnode *, int));
83 static int setfflags __P((struct proc *, struct vnode *, int));
84 static int setutimes __P((struct proc *, struct vnode *,
85     const struct timespec *, int));
86 static int	usermount = 0;	/* if 1, non-root can mount fs. */
87 
88 int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
89 
90 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
91 
92 /*
93  * Virtual File System System Calls
94  */
95 
96 /*
97  * Mount a file system.
98  */
99 #ifndef _SYS_SYSPROTO_H_
100 struct mount_args {
101 	char	*type;
102 	char	*path;
103 	int	flags;
104 	caddr_t	data;
105 };
106 #endif
107 /* ARGSUSED */
108 int
109 mount(p, uap)
110 	struct proc *p;
111 	struct mount_args /* {
112 		syscallarg(char *) type;
113 		syscallarg(char *) path;
114 		syscallarg(int) flags;
115 		syscallarg(caddr_t) data;
116 	} */ *uap;
117 {
118 	char *fstype;
119 	char *fspath;
120 	int error;
121 
122 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
123 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
124 
125 	/*
126 	 * vfs_mount() actually takes a kernel string for `type' and
127 	 * `path' now, so extract them.
128 	 */
129 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
130 	if (error)
131 		goto finish;
132 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
133 	if (error)
134 		goto finish;
135 	error = vfs_mount(p, fstype, fspath, SCARG(uap, flags),
136 	    SCARG(uap, data));
137 finish:
138 	free(fstype, M_TEMP);
139 	free(fspath, M_TEMP);
140 	return (error);
141 }
142 
143 /*
144  * vfs_mount(): actually attempt a filesystem mount.
145  *
146  * This routine is designed to be a "generic" entry point for routines
147  * that wish to mount a filesystem. All parameters except `fsdata' are
148  * pointers into kernel space. `fsdata' is currently still a pointer
149  * into userspace.
150  */
151 int
152 vfs_mount(p, fstype, fspath, fsflags, fsdata)
153 	struct proc *p;
154 	char *fstype;
155 	char *fspath;
156 	int fsflags;
157 	void *fsdata;
158 {
159 	struct vnode *vp;
160 	struct mount *mp;
161 	struct vfsconf *vfsp;
162 	int error, flag = 0, flag2 = 0;
163 	struct vattr va;
164 	struct nameidata nd;
165 
166 	/*
167 	 * Be ultra-paranoid about making sure the type and fspath
168 	 * variables will fit in our mp buffers, including the
169 	 * terminating NUL.
170 	 */
171 	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
172 	    (strlen(fspath) >= MNAMELEN - 1))
173 		return (ENAMETOOLONG);
174 
175 	if (usermount == 0 && (error = suser(p)))
176 		return (error);
177 	/*
178 	 * Do not allow NFS export by non-root users.
179 	 */
180 	if (fsflags & MNT_EXPORTED) {
181 		error = suser(p);
182 		if (error)
183 			return (error);
184 	}
185 	/*
186 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
187 	 */
188 	if (suser_xxx(p->p_ucred, 0, 0))
189 		fsflags |= MNT_NOSUID | MNT_NODEV;
190 	/*
191 	 * Get vnode to be covered
192 	 */
193 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
194 	if ((error = namei(&nd)) != 0)
195 		return (error);
196 	NDFREE(&nd, NDF_ONLY_PNBUF);
197 	vp = nd.ni_vp;
198 	if (fsflags & MNT_UPDATE) {
199 		if ((vp->v_flag & VROOT) == 0) {
200 			vput(vp);
201 			return (EINVAL);
202 		}
203 		mp = vp->v_mount;
204 		flag = mp->mnt_flag;
205 		flag2 = mp->mnt_kern_flag;
206 		/*
207 		 * We only allow the filesystem to be reloaded if it
208 		 * is currently mounted read-only.
209 		 */
210 		if ((fsflags & MNT_RELOAD) &&
211 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
212 			vput(vp);
213 			return (EOPNOTSUPP);	/* Needs translation */
214 		}
215 		/*
216 		 * Only root, or the user that did the original mount is
217 		 * permitted to update it.
218 		 */
219 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
220 		    (error = suser(p))) {
221 			vput(vp);
222 			return (error);
223 		}
224 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
225 			vput(vp);
226 			return (EBUSY);
227 		}
228 		mtx_lock(&vp->v_interlock);
229 		if ((vp->v_flag & VMOUNT) != 0 ||
230 		    vp->v_mountedhere != NULL) {
231 			mtx_unlock(&vp->v_interlock);
232 			vfs_unbusy(mp, p);
233 			vput(vp);
234 			return (EBUSY);
235 		}
236 		vp->v_flag |= VMOUNT;
237 		mtx_unlock(&vp->v_interlock);
238 		mp->mnt_flag |= fsflags &
239 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
240 		VOP_UNLOCK(vp, 0, p);
241 		goto update;
242 	}
243 	/*
244 	 * If the user is not root, ensure that they own the directory
245 	 * onto which we are attempting to mount.
246 	 */
247 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
248 	    (va.va_uid != p->p_ucred->cr_uid &&
249 	     (error = suser(p)))) {
250 		vput(vp);
251 		return (error);
252 	}
253 	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
254 		vput(vp);
255 		return (error);
256 	}
257 	if (vp->v_type != VDIR) {
258 		vput(vp);
259 		return (ENOTDIR);
260 	}
261 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
262 		if (!strcmp(vfsp->vfc_name, fstype))
263 			break;
264 	if (vfsp == NULL) {
265 		linker_file_t lf;
266 
267 		/* Only load modules for root (very important!) */
268 		if ((error = suser(p)) != 0) {
269 			vput(vp);
270 			return error;
271 		}
272 		error = linker_load_file(fstype, &lf);
273 		if (error || lf == NULL) {
274 			vput(vp);
275 			if (lf == NULL)
276 				error = ENODEV;
277 			return error;
278 		}
279 		lf->userrefs++;
280 		/* lookup again, see if the VFS was loaded */
281 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
282 			if (!strcmp(vfsp->vfc_name, fstype))
283 				break;
284 		if (vfsp == NULL) {
285 			lf->userrefs--;
286 			linker_file_unload(lf);
287 			vput(vp);
288 			return (ENODEV);
289 		}
290 	}
291 	mtx_lock(&vp->v_interlock);
292 	if ((vp->v_flag & VMOUNT) != 0 ||
293 	    vp->v_mountedhere != NULL) {
294 		mtx_unlock(&vp->v_interlock);
295 		vput(vp);
296 		return (EBUSY);
297 	}
298 	vp->v_flag |= VMOUNT;
299 	mtx_unlock(&vp->v_interlock);
300 
301 	/*
302 	 * Allocate and initialize the filesystem.
303 	 */
304 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
305 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
306 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
307 	mp->mnt_op = vfsp->vfc_vfsops;
308 	mp->mnt_vfc = vfsp;
309 	vfsp->vfc_refcount++;
310 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
311 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
312 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
313 	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
314 	mp->mnt_vnodecovered = vp;
315 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
316 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
317 	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
318 	mp->mnt_iosize_max = DFLTPHYS;
319 	VOP_UNLOCK(vp, 0, p);
320 update:
321 	/*
322 	 * Set the mount level flags.
323 	 */
324 	if (fsflags & MNT_RDONLY)
325 		mp->mnt_flag |= MNT_RDONLY;
326 	else if (mp->mnt_flag & MNT_RDONLY)
327 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
328 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
329 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
330 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
331 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
332 	mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC |
333 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
334 	    MNT_NOSYMFOLLOW | MNT_IGNORE |
335 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
336 	/*
337 	 * Mount the filesystem.
338 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
339 	 * get.  No freeing of cn_pnbuf.
340 	 */
341 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, p);
342 	if (mp->mnt_flag & MNT_UPDATE) {
343 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
344 			mp->mnt_flag &= ~MNT_RDONLY;
345 		mp->mnt_flag &=~
346 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
347 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
348 		if (error) {
349 			mp->mnt_flag = flag;
350 			mp->mnt_kern_flag = flag2;
351 		}
352 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
353 			if (mp->mnt_syncer == NULL)
354 				error = vfs_allocate_syncvnode(mp);
355 		} else {
356 			if (mp->mnt_syncer != NULL)
357 				vrele(mp->mnt_syncer);
358 			mp->mnt_syncer = NULL;
359 		}
360 		vfs_unbusy(mp, p);
361 		mtx_lock(&vp->v_interlock);
362 		vp->v_flag &= ~VMOUNT;
363 		mtx_unlock(&vp->v_interlock);
364 		vrele(vp);
365 		return (error);
366 	}
367 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
368 	/*
369 	 * Put the new filesystem on the mount list after root.
370 	 */
371 	cache_purge(vp);
372 	if (!error) {
373 		struct vnode *newdp;
374 
375 		mtx_lock(&vp->v_interlock);
376 		vp->v_flag &= ~VMOUNT;
377 		vp->v_mountedhere = mp;
378 		mtx_unlock(&vp->v_interlock);
379 		mtx_lock(&mountlist_mtx);
380 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
381 		mtx_unlock(&mountlist_mtx);
382 		if (VFS_ROOT(mp, &newdp))
383 			panic("mount: lost mount");
384 		checkdirs(vp, newdp);
385 		vput(newdp);
386 		VOP_UNLOCK(vp, 0, p);
387 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
388 			error = vfs_allocate_syncvnode(mp);
389 		vfs_unbusy(mp, p);
390 		if ((error = VFS_START(mp, 0, p)) != 0)
391 			vrele(vp);
392 	} else {
393 		mtx_lock(&vp->v_interlock);
394 		vp->v_flag &= ~VMOUNT;
395 		mtx_unlock(&vp->v_interlock);
396 		mp->mnt_vfc->vfc_refcount--;
397 		vfs_unbusy(mp, p);
398 		free((caddr_t)mp, M_MOUNT);
399 		vput(vp);
400 	}
401 	return (error);
402 }
403 
404 /*
405  * Scan all active processes to see if any of them have a current
406  * or root directory of `olddp'. If so, replace them with the new
407  * mount point.
408  */
409 static void
410 checkdirs(olddp, newdp)
411 	struct vnode *olddp, *newdp;
412 {
413 	struct filedesc *fdp;
414 	struct proc *p;
415 
416 	if (olddp->v_usecount == 1)
417 		return;
418 	sx_slock(&allproc_lock);
419 	LIST_FOREACH(p, &allproc, p_list) {
420 		fdp = p->p_fd;
421 		if (fdp == NULL)
422 			continue;
423 		if (fdp->fd_cdir == olddp) {
424 			vrele(fdp->fd_cdir);
425 			VREF(newdp);
426 			fdp->fd_cdir = newdp;
427 		}
428 		if (fdp->fd_rdir == olddp) {
429 			vrele(fdp->fd_rdir);
430 			VREF(newdp);
431 			fdp->fd_rdir = newdp;
432 		}
433 	}
434 	sx_sunlock(&allproc_lock);
435 	if (rootvnode == olddp) {
436 		vrele(rootvnode);
437 		VREF(newdp);
438 		rootvnode = newdp;
439 	}
440 }
441 
442 /*
443  * Unmount a file system.
444  *
445  * Note: unmount takes a path to the vnode mounted on as argument,
446  * not special file (as before).
447  */
448 #ifndef _SYS_SYSPROTO_H_
449 struct unmount_args {
450 	char	*path;
451 	int	flags;
452 };
453 #endif
454 /* ARGSUSED */
455 int
456 unmount(p, uap)
457 	struct proc *p;
458 	register struct unmount_args /* {
459 		syscallarg(char *) path;
460 		syscallarg(int) flags;
461 	} */ *uap;
462 {
463 	register struct vnode *vp;
464 	struct mount *mp;
465 	int error;
466 	struct nameidata nd;
467 
468 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
469 	    SCARG(uap, path), p);
470 	if ((error = namei(&nd)) != 0)
471 		return (error);
472 	vp = nd.ni_vp;
473 	NDFREE(&nd, NDF_ONLY_PNBUF);
474 	mp = vp->v_mount;
475 
476 	/*
477 	 * Only root, or the user that did the original mount is
478 	 * permitted to unmount this filesystem.
479 	 */
480 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
481 	    (error = suser(p))) {
482 		vput(vp);
483 		return (error);
484 	}
485 
486 	/*
487 	 * Don't allow unmounting the root file system.
488 	 */
489 	if (mp->mnt_flag & MNT_ROOTFS) {
490 		vput(vp);
491 		return (EINVAL);
492 	}
493 
494 	/*
495 	 * Must be the root of the filesystem
496 	 */
497 	if ((vp->v_flag & VROOT) == 0) {
498 		vput(vp);
499 		return (EINVAL);
500 	}
501 	vput(vp);
502 	return (dounmount(mp, SCARG(uap, flags), p));
503 }
504 
505 /*
506  * Do the actual file system unmount.
507  */
508 int
509 dounmount(mp, flags, p)
510 	struct mount *mp;
511 	int flags;
512 	struct proc *p;
513 {
514 	struct vnode *coveredvp, *fsrootvp;
515 	int error;
516 	int async_flag;
517 
518 	mtx_lock(&mountlist_mtx);
519 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
520 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, p);
521 	vn_start_write(NULL, &mp, V_WAIT);
522 
523 	if (mp->mnt_flag & MNT_EXPUBLIC)
524 		vfs_setpublicfs(NULL, NULL, NULL);
525 
526 	vfs_msync(mp, MNT_WAIT);
527 	async_flag = mp->mnt_flag & MNT_ASYNC;
528 	mp->mnt_flag &=~ MNT_ASYNC;
529 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
530 	if (mp->mnt_syncer != NULL)
531 		vrele(mp->mnt_syncer);
532 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
533 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
534 		if (mp->mnt_vnodecovered != NULL)
535 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
536 		if (fsrootvp == rootvnode) {
537 			vrele(rootvnode);
538 			rootvnode = NULL;
539 		}
540 		vput(fsrootvp);
541 	}
542 	if (((mp->mnt_flag & MNT_RDONLY) ||
543 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
544 	    (flags & MNT_FORCE)) {
545 		error = VFS_UNMOUNT(mp, flags, p);
546 	}
547 	vn_finished_write(mp);
548 	mtx_lock(&mountlist_mtx);
549 	if (error) {
550 		/* Undo cdir/rdir and rootvnode changes made above. */
551 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
552 			if (mp->mnt_vnodecovered != NULL)
553 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
554 			if (rootvnode == NULL) {
555 				rootvnode = fsrootvp;
556 				vref(rootvnode);
557 			}
558 			vput(fsrootvp);
559 		}
560 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
561 			(void) vfs_allocate_syncvnode(mp);
562 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
563 		mp->mnt_flag |= async_flag;
564 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
565 		    &mountlist_mtx, p);
566 		if (mp->mnt_kern_flag & MNTK_MWAIT)
567 			wakeup((caddr_t)mp);
568 		return (error);
569 	}
570 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
571 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
572 		coveredvp->v_mountedhere = (struct mount *)0;
573 		vrele(coveredvp);
574 	}
575 	mp->mnt_vfc->vfc_refcount--;
576 	if (!LIST_EMPTY(&mp->mnt_vnodelist))
577 		panic("unmount: dangling vnode");
578 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, p);
579 	lockdestroy(&mp->mnt_lock);
580 	if (mp->mnt_kern_flag & MNTK_MWAIT)
581 		wakeup((caddr_t)mp);
582 	free((caddr_t)mp, M_MOUNT);
583 	return (0);
584 }
585 
586 /*
587  * Sync each mounted filesystem.
588  */
589 #ifndef _SYS_SYSPROTO_H_
590 struct sync_args {
591         int     dummy;
592 };
593 #endif
594 
595 #ifdef DEBUG
596 static int syncprt = 0;
597 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
598 #endif
599 
600 /* ARGSUSED */
601 int
602 sync(p, uap)
603 	struct proc *p;
604 	struct sync_args *uap;
605 {
606 	struct mount *mp, *nmp;
607 	int asyncflag;
608 
609 	mtx_lock(&mountlist_mtx);
610 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
611 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
612 			nmp = TAILQ_NEXT(mp, mnt_list);
613 			continue;
614 		}
615 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
616 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
617 			asyncflag = mp->mnt_flag & MNT_ASYNC;
618 			mp->mnt_flag &= ~MNT_ASYNC;
619 			vfs_msync(mp, MNT_NOWAIT);
620 			VFS_SYNC(mp, MNT_NOWAIT,
621 			    ((p != NULL) ? p->p_ucred : NOCRED), p);
622 			mp->mnt_flag |= asyncflag;
623 			vn_finished_write(mp);
624 		}
625 		mtx_lock(&mountlist_mtx);
626 		nmp = TAILQ_NEXT(mp, mnt_list);
627 		vfs_unbusy(mp, p);
628 	}
629 	mtx_unlock(&mountlist_mtx);
630 #if 0
631 /*
632  * XXX don't call vfs_bufstats() yet because that routine
633  * was not imported in the Lite2 merge.
634  */
635 #ifdef DIAGNOSTIC
636 	if (syncprt)
637 		vfs_bufstats();
638 #endif /* DIAGNOSTIC */
639 #endif
640 	return (0);
641 }
642 
643 /* XXX PRISON: could be per prison flag */
644 static int prison_quotas;
645 #if 0
646 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
647 #endif
648 
649 /*
650  * Change filesystem quotas.
651  */
652 #ifndef _SYS_SYSPROTO_H_
653 struct quotactl_args {
654 	char *path;
655 	int cmd;
656 	int uid;
657 	caddr_t arg;
658 };
659 #endif
660 /* ARGSUSED */
661 int
662 quotactl(p, uap)
663 	struct proc *p;
664 	register struct quotactl_args /* {
665 		syscallarg(char *) path;
666 		syscallarg(int) cmd;
667 		syscallarg(int) uid;
668 		syscallarg(caddr_t) arg;
669 	} */ *uap;
670 {
671 	struct mount *mp;
672 	int error;
673 	struct nameidata nd;
674 
675 	if (jailed(p->p_ucred) && !prison_quotas)
676 		return (EPERM);
677 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
678 	if ((error = namei(&nd)) != 0)
679 		return (error);
680 	NDFREE(&nd, NDF_ONLY_PNBUF);
681 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
682 	vrele(nd.ni_vp);
683 	if (error)
684 		return (error);
685 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
686 	    SCARG(uap, arg), p);
687 	vn_finished_write(mp);
688 	return (error);
689 }
690 
691 /*
692  * Get filesystem statistics.
693  */
694 #ifndef _SYS_SYSPROTO_H_
695 struct statfs_args {
696 	char *path;
697 	struct statfs *buf;
698 };
699 #endif
700 /* ARGSUSED */
701 int
702 statfs(p, uap)
703 	struct proc *p;
704 	register struct statfs_args /* {
705 		syscallarg(char *) path;
706 		syscallarg(struct statfs *) buf;
707 	} */ *uap;
708 {
709 	register struct mount *mp;
710 	register struct statfs *sp;
711 	int error;
712 	struct nameidata nd;
713 	struct statfs sb;
714 
715 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
716 	if ((error = namei(&nd)) != 0)
717 		return (error);
718 	mp = nd.ni_vp->v_mount;
719 	sp = &mp->mnt_stat;
720 	NDFREE(&nd, NDF_ONLY_PNBUF);
721 	vrele(nd.ni_vp);
722 	error = VFS_STATFS(mp, sp, p);
723 	if (error)
724 		return (error);
725 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
726 	if (suser_xxx(p->p_ucred, 0, 0)) {
727 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
728 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
729 		sp = &sb;
730 	}
731 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
732 }
733 
734 /*
735  * Get filesystem statistics.
736  */
737 #ifndef _SYS_SYSPROTO_H_
738 struct fstatfs_args {
739 	int fd;
740 	struct statfs *buf;
741 };
742 #endif
743 /* ARGSUSED */
744 int
745 fstatfs(p, uap)
746 	struct proc *p;
747 	register struct fstatfs_args /* {
748 		syscallarg(int) fd;
749 		syscallarg(struct statfs *) buf;
750 	} */ *uap;
751 {
752 	struct file *fp;
753 	struct mount *mp;
754 	register struct statfs *sp;
755 	int error;
756 	struct statfs sb;
757 
758 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
759 		return (error);
760 	mp = ((struct vnode *)fp->f_data)->v_mount;
761 	sp = &mp->mnt_stat;
762 	error = VFS_STATFS(mp, sp, p);
763 	if (error)
764 		return (error);
765 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
766 	if (suser_xxx(p->p_ucred, 0, 0)) {
767 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
768 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
769 		sp = &sb;
770 	}
771 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
772 }
773 
774 /*
775  * Get statistics on all filesystems.
776  */
777 #ifndef _SYS_SYSPROTO_H_
778 struct getfsstat_args {
779 	struct statfs *buf;
780 	long bufsize;
781 	int flags;
782 };
783 #endif
784 int
785 getfsstat(p, uap)
786 	struct proc *p;
787 	register struct getfsstat_args /* {
788 		syscallarg(struct statfs *) buf;
789 		syscallarg(long) bufsize;
790 		syscallarg(int) flags;
791 	} */ *uap;
792 {
793 	register struct mount *mp, *nmp;
794 	register struct statfs *sp;
795 	caddr_t sfsp;
796 	long count, maxcount, error;
797 
798 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
799 	sfsp = (caddr_t)SCARG(uap, buf);
800 	count = 0;
801 	mtx_lock(&mountlist_mtx);
802 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
803 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
804 			nmp = TAILQ_NEXT(mp, mnt_list);
805 			continue;
806 		}
807 		if (sfsp && count < maxcount) {
808 			sp = &mp->mnt_stat;
809 			/*
810 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
811 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
812 			 * overrides MNT_WAIT.
813 			 */
814 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
815 			    (SCARG(uap, flags) & MNT_WAIT)) &&
816 			    (error = VFS_STATFS(mp, sp, p))) {
817 				mtx_lock(&mountlist_mtx);
818 				nmp = TAILQ_NEXT(mp, mnt_list);
819 				vfs_unbusy(mp, p);
820 				continue;
821 			}
822 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
823 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
824 			if (error) {
825 				vfs_unbusy(mp, p);
826 				return (error);
827 			}
828 			sfsp += sizeof(*sp);
829 		}
830 		count++;
831 		mtx_lock(&mountlist_mtx);
832 		nmp = TAILQ_NEXT(mp, mnt_list);
833 		vfs_unbusy(mp, p);
834 	}
835 	mtx_unlock(&mountlist_mtx);
836 	if (sfsp && count > maxcount)
837 		p->p_retval[0] = maxcount;
838 	else
839 		p->p_retval[0] = count;
840 	return (0);
841 }
842 
843 /*
844  * Change current working directory to a given file descriptor.
845  */
846 #ifndef _SYS_SYSPROTO_H_
847 struct fchdir_args {
848 	int	fd;
849 };
850 #endif
851 /* ARGSUSED */
852 int
853 fchdir(p, uap)
854 	struct proc *p;
855 	struct fchdir_args /* {
856 		syscallarg(int) fd;
857 	} */ *uap;
858 {
859 	register struct filedesc *fdp = p->p_fd;
860 	struct vnode *vp, *tdp;
861 	struct mount *mp;
862 	struct file *fp;
863 	int error;
864 
865 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
866 		return (error);
867 	vp = (struct vnode *)fp->f_data;
868 	VREF(vp);
869 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
870 	if (vp->v_type != VDIR)
871 		error = ENOTDIR;
872 	else
873 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
874 	while (!error && (mp = vp->v_mountedhere) != NULL) {
875 		if (vfs_busy(mp, 0, 0, p))
876 			continue;
877 		error = VFS_ROOT(mp, &tdp);
878 		vfs_unbusy(mp, p);
879 		if (error)
880 			break;
881 		vput(vp);
882 		vp = tdp;
883 	}
884 	if (error) {
885 		vput(vp);
886 		return (error);
887 	}
888 	VOP_UNLOCK(vp, 0, p);
889 	vrele(fdp->fd_cdir);
890 	fdp->fd_cdir = vp;
891 	return (0);
892 }
893 
894 /*
895  * Change current working directory (``.'').
896  */
897 #ifndef _SYS_SYSPROTO_H_
898 struct chdir_args {
899 	char	*path;
900 };
901 #endif
902 /* ARGSUSED */
903 int
904 chdir(p, uap)
905 	struct proc *p;
906 	struct chdir_args /* {
907 		syscallarg(char *) path;
908 	} */ *uap;
909 {
910 	register struct filedesc *fdp = p->p_fd;
911 	int error;
912 	struct nameidata nd;
913 
914 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
915 	    SCARG(uap, path), p);
916 	if ((error = change_dir(&nd, p)) != 0)
917 		return (error);
918 	NDFREE(&nd, NDF_ONLY_PNBUF);
919 	vrele(fdp->fd_cdir);
920 	fdp->fd_cdir = nd.ni_vp;
921 	return (0);
922 }
923 
924 /*
925  * Helper function for raised chroot(2) security function:  Refuse if
926  * any filedescriptors are open directories.
927  */
928 static int
929 chroot_refuse_vdir_fds(fdp)
930 	struct filedesc *fdp;
931 {
932 	struct vnode *vp;
933 	struct file *fp;
934 	int error;
935 	int fd;
936 
937 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
938 		error = getvnode(fdp, fd, &fp);
939 		if (error)
940 			continue;
941 		vp = (struct vnode *)fp->f_data;
942 		if (vp->v_type != VDIR)
943 			continue;
944 		return(EPERM);
945 	}
946 	return (0);
947 }
948 
949 /*
950  * This sysctl determines if we will allow a process to chroot(2) if it
951  * has a directory open:
952  *	0: disallowed for all processes.
953  *	1: allowed for processes that were not already chroot(2)'ed.
954  *	2: allowed for all processes.
955  */
956 
957 static int chroot_allow_open_directories = 1;
958 
959 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
960      &chroot_allow_open_directories, 0, "");
961 
962 /*
963  * Change notion of root (``/'') directory.
964  */
965 #ifndef _SYS_SYSPROTO_H_
966 struct chroot_args {
967 	char	*path;
968 };
969 #endif
970 /* ARGSUSED */
971 int
972 chroot(p, uap)
973 	struct proc *p;
974 	struct chroot_args /* {
975 		syscallarg(char *) path;
976 	} */ *uap;
977 {
978 	register struct filedesc *fdp = p->p_fd;
979 	int error;
980 	struct nameidata nd;
981 
982 	error = suser_xxx(0, p, PRISON_ROOT);
983 	if (error)
984 		return (error);
985 	if (chroot_allow_open_directories == 0 ||
986 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
987 		error = chroot_refuse_vdir_fds(fdp);
988 	if (error)
989 		return (error);
990 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
991 	    SCARG(uap, path), p);
992 	if ((error = change_dir(&nd, p)) != 0)
993 		return (error);
994 	NDFREE(&nd, NDF_ONLY_PNBUF);
995 	vrele(fdp->fd_rdir);
996 	fdp->fd_rdir = nd.ni_vp;
997 	if (!fdp->fd_jdir) {
998 		fdp->fd_jdir = nd.ni_vp;
999                 VREF(fdp->fd_jdir);
1000 	}
1001 	return (0);
1002 }
1003 
1004 /*
1005  * Common routine for chroot and chdir.
1006  */
1007 static int
1008 change_dir(ndp, p)
1009 	register struct nameidata *ndp;
1010 	struct proc *p;
1011 {
1012 	struct vnode *vp;
1013 	int error;
1014 
1015 	error = namei(ndp);
1016 	if (error)
1017 		return (error);
1018 	vp = ndp->ni_vp;
1019 	if (vp->v_type != VDIR)
1020 		error = ENOTDIR;
1021 	else
1022 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
1023 	if (error)
1024 		vput(vp);
1025 	else
1026 		VOP_UNLOCK(vp, 0, p);
1027 	return (error);
1028 }
1029 
1030 /*
1031  * Check permissions, allocate an open file structure,
1032  * and call the device open routine if any.
1033  */
1034 #ifndef _SYS_SYSPROTO_H_
1035 struct open_args {
1036 	char	*path;
1037 	int	flags;
1038 	int	mode;
1039 };
1040 #endif
1041 int
1042 open(p, uap)
1043 	struct proc *p;
1044 	register struct open_args /* {
1045 		syscallarg(char *) path;
1046 		syscallarg(int) flags;
1047 		syscallarg(int) mode;
1048 	} */ *uap;
1049 {
1050 	struct filedesc *fdp = p->p_fd;
1051 	struct file *fp;
1052 	struct vnode *vp;
1053 	struct vattr vat;
1054 	struct mount *mp;
1055 	int cmode, flags, oflags;
1056 	struct file *nfp;
1057 	int type, indx, error;
1058 	struct flock lf;
1059 	struct nameidata nd;
1060 
1061 	oflags = SCARG(uap, flags);
1062 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1063 		return (EINVAL);
1064 	flags = FFLAGS(oflags);
1065 	error = falloc(p, &nfp, &indx);
1066 	if (error)
1067 		return (error);
1068 	fp = nfp;
1069 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1070 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1071 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1072 	/*
1073 	 * Bump the ref count to prevent another process from closing
1074 	 * the descriptor while we are blocked in vn_open()
1075 	 */
1076 	fhold(fp);
1077 	error = vn_open(&nd, &flags, cmode);
1078 	if (error) {
1079 		/*
1080 		 * release our own reference
1081 		 */
1082 		fdrop(fp, p);
1083 
1084 		/*
1085 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1086 		 * responsible for dropping the old contents of ofiles[indx]
1087 		 * if it succeeds.
1088 		 */
1089 		if ((error == ENODEV || error == ENXIO) &&
1090 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1091 		    (error =
1092 			dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) {
1093 			p->p_retval[0] = indx;
1094 			return (0);
1095 		}
1096 		/*
1097 		 * Clean up the descriptor, but only if another thread hadn't
1098 		 * replaced or closed it.
1099 		 */
1100 		if (fdp->fd_ofiles[indx] == fp) {
1101 			fdp->fd_ofiles[indx] = NULL;
1102 			fdrop(fp, p);
1103 		}
1104 
1105 		if (error == ERESTART)
1106 			error = EINTR;
1107 		return (error);
1108 	}
1109 	p->p_dupfd = 0;
1110 	NDFREE(&nd, NDF_ONLY_PNBUF);
1111 	vp = nd.ni_vp;
1112 
1113 	/*
1114 	 * There should be 2 references on the file, one from the descriptor
1115 	 * table, and one for us.
1116 	 *
1117 	 * Handle the case where someone closed the file (via its file
1118 	 * descriptor) while we were blocked.  The end result should look
1119 	 * like opening the file succeeded but it was immediately closed.
1120 	 */
1121 	if (fp->f_count == 1) {
1122 		KASSERT(fdp->fd_ofiles[indx] != fp,
1123 		    ("Open file descriptor lost all refs"));
1124 		VOP_UNLOCK(vp, 0, p);
1125 		vn_close(vp, flags & FMASK, fp->f_cred, p);
1126 		fdrop(fp, p);
1127 		p->p_retval[0] = indx;
1128 		return 0;
1129 	}
1130 
1131 	fp->f_data = (caddr_t)vp;
1132 	fp->f_flag = flags & FMASK;
1133 	fp->f_ops = &vnops;
1134 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1135 	VOP_UNLOCK(vp, 0, p);
1136 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1137 		lf.l_whence = SEEK_SET;
1138 		lf.l_start = 0;
1139 		lf.l_len = 0;
1140 		if (flags & O_EXLOCK)
1141 			lf.l_type = F_WRLCK;
1142 		else
1143 			lf.l_type = F_RDLCK;
1144 		type = F_FLOCK;
1145 		if ((flags & FNONBLOCK) == 0)
1146 			type |= F_WAIT;
1147 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1148 			goto bad;
1149 		fp->f_flag |= FHASLOCK;
1150 	}
1151 	if (flags & O_TRUNC) {
1152 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1153 			goto bad;
1154 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1155 		VATTR_NULL(&vat);
1156 		vat.va_size = 0;
1157 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1158 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
1159 		VOP_UNLOCK(vp, 0, p);
1160 		vn_finished_write(mp);
1161 		if (error)
1162 			goto bad;
1163 	}
1164 	/* assert that vn_open created a backing object if one is needed */
1165 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1166 		("open: vmio vnode has no backing object after vn_open"));
1167 	/*
1168 	 * Release our private reference, leaving the one associated with
1169 	 * the descriptor table intact.
1170 	 */
1171 	fdrop(fp, p);
1172 	p->p_retval[0] = indx;
1173 	return (0);
1174 bad:
1175 	if (fdp->fd_ofiles[indx] == fp) {
1176 		fdp->fd_ofiles[indx] = NULL;
1177 		fdrop(fp, p);
1178 	}
1179 	fdrop(fp, p);
1180 	return (error);
1181 }
1182 
1183 #ifdef COMPAT_43
1184 /*
1185  * Create a file.
1186  */
1187 #ifndef _SYS_SYSPROTO_H_
1188 struct ocreat_args {
1189 	char	*path;
1190 	int	mode;
1191 };
1192 #endif
1193 int
1194 ocreat(p, uap)
1195 	struct proc *p;
1196 	register struct ocreat_args /* {
1197 		syscallarg(char *) path;
1198 		syscallarg(int) mode;
1199 	} */ *uap;
1200 {
1201 	struct open_args /* {
1202 		syscallarg(char *) path;
1203 		syscallarg(int) flags;
1204 		syscallarg(int) mode;
1205 	} */ nuap;
1206 
1207 	SCARG(&nuap, path) = SCARG(uap, path);
1208 	SCARG(&nuap, mode) = SCARG(uap, mode);
1209 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1210 	return (open(p, &nuap));
1211 }
1212 #endif /* COMPAT_43 */
1213 
1214 /*
1215  * Create a special file.
1216  */
1217 #ifndef _SYS_SYSPROTO_H_
1218 struct mknod_args {
1219 	char	*path;
1220 	int	mode;
1221 	int	dev;
1222 };
1223 #endif
1224 /* ARGSUSED */
1225 int
1226 mknod(p, uap)
1227 	struct proc *p;
1228 	register struct mknod_args /* {
1229 		syscallarg(char *) path;
1230 		syscallarg(int) mode;
1231 		syscallarg(int) dev;
1232 	} */ *uap;
1233 {
1234 	struct vnode *vp;
1235 	struct mount *mp;
1236 	struct vattr vattr;
1237 	int error;
1238 	int whiteout = 0;
1239 	struct nameidata nd;
1240 
1241 	switch (SCARG(uap, mode) & S_IFMT) {
1242 	case S_IFCHR:
1243 	case S_IFBLK:
1244 		error = suser(p);
1245 		break;
1246 	default:
1247 		error = suser_xxx(0, p, PRISON_ROOT);
1248 		break;
1249 	}
1250 	if (error)
1251 		return (error);
1252 restart:
1253 	bwillwrite();
1254 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1255 	if ((error = namei(&nd)) != 0)
1256 		return (error);
1257 	vp = nd.ni_vp;
1258 	if (vp != NULL) {
1259 		vrele(vp);
1260 		error = EEXIST;
1261 	} else {
1262 		VATTR_NULL(&vattr);
1263 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1264 		vattr.va_rdev = SCARG(uap, dev);
1265 		whiteout = 0;
1266 
1267 		switch (SCARG(uap, mode) & S_IFMT) {
1268 		case S_IFMT:	/* used by badsect to flag bad sectors */
1269 			vattr.va_type = VBAD;
1270 			break;
1271 		case S_IFCHR:
1272 			vattr.va_type = VCHR;
1273 			break;
1274 		case S_IFBLK:
1275 			vattr.va_type = VBLK;
1276 			break;
1277 		case S_IFWHT:
1278 			whiteout = 1;
1279 			break;
1280 		default:
1281 			error = EINVAL;
1282 			break;
1283 		}
1284 	}
1285 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1286 		NDFREE(&nd, NDF_ONLY_PNBUF);
1287 		vput(nd.ni_dvp);
1288 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1289 			return (error);
1290 		goto restart;
1291 	}
1292 	if (!error) {
1293 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1294 		if (whiteout)
1295 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1296 		else {
1297 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1298 						&nd.ni_cnd, &vattr);
1299 			if (error == 0)
1300 				vput(nd.ni_vp);
1301 		}
1302 	}
1303 	NDFREE(&nd, NDF_ONLY_PNBUF);
1304 	vput(nd.ni_dvp);
1305 	vn_finished_write(mp);
1306 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1307 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1308 	return (error);
1309 }
1310 
1311 /*
1312  * Create a named pipe.
1313  */
1314 #ifndef _SYS_SYSPROTO_H_
1315 struct mkfifo_args {
1316 	char	*path;
1317 	int	mode;
1318 };
1319 #endif
1320 /* ARGSUSED */
1321 int
1322 mkfifo(p, uap)
1323 	struct proc *p;
1324 	register struct mkfifo_args /* {
1325 		syscallarg(char *) path;
1326 		syscallarg(int) mode;
1327 	} */ *uap;
1328 {
1329 	struct mount *mp;
1330 	struct vattr vattr;
1331 	int error;
1332 	struct nameidata nd;
1333 
1334 restart:
1335 	bwillwrite();
1336 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1337 	if ((error = namei(&nd)) != 0)
1338 		return (error);
1339 	if (nd.ni_vp != NULL) {
1340 		NDFREE(&nd, NDF_ONLY_PNBUF);
1341 		vrele(nd.ni_vp);
1342 		vput(nd.ni_dvp);
1343 		return (EEXIST);
1344 	}
1345 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1346 		NDFREE(&nd, NDF_ONLY_PNBUF);
1347 		vput(nd.ni_dvp);
1348 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1349 			return (error);
1350 		goto restart;
1351 	}
1352 	VATTR_NULL(&vattr);
1353 	vattr.va_type = VFIFO;
1354 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1355 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1356 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1357 	if (error == 0)
1358 		vput(nd.ni_vp);
1359 	NDFREE(&nd, NDF_ONLY_PNBUF);
1360 	vput(nd.ni_dvp);
1361 	vn_finished_write(mp);
1362 	return (error);
1363 }
1364 
1365 /*
1366  * Make a hard file link.
1367  */
1368 #ifndef _SYS_SYSPROTO_H_
1369 struct link_args {
1370 	char	*path;
1371 	char	*link;
1372 };
1373 #endif
1374 /* ARGSUSED */
1375 int
1376 link(p, uap)
1377 	struct proc *p;
1378 	register struct link_args /* {
1379 		syscallarg(char *) path;
1380 		syscallarg(char *) link;
1381 	} */ *uap;
1382 {
1383 	struct vnode *vp;
1384 	struct mount *mp;
1385 	struct nameidata nd;
1386 	int error;
1387 
1388 	bwillwrite();
1389 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1390 	if ((error = namei(&nd)) != 0)
1391 		return (error);
1392 	NDFREE(&nd, NDF_ONLY_PNBUF);
1393 	vp = nd.ni_vp;
1394 	if (vp->v_type == VDIR) {
1395 		vrele(vp);
1396 		return (EPERM);		/* POSIX */
1397 	}
1398 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1399 		vrele(vp);
1400 		return (error);
1401 	}
1402 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1403 	if ((error = namei(&nd)) == 0) {
1404 		if (nd.ni_vp != NULL) {
1405 			vrele(nd.ni_vp);
1406 			error = EEXIST;
1407 		} else {
1408 			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1409 			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1410 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1411 		}
1412 		NDFREE(&nd, NDF_ONLY_PNBUF);
1413 		vput(nd.ni_dvp);
1414 	}
1415 	vrele(vp);
1416 	vn_finished_write(mp);
1417 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1418 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1419 	return (error);
1420 }
1421 
1422 /*
1423  * Make a symbolic link.
1424  */
1425 #ifndef _SYS_SYSPROTO_H_
1426 struct symlink_args {
1427 	char	*path;
1428 	char	*link;
1429 };
1430 #endif
1431 /* ARGSUSED */
1432 int
1433 symlink(p, uap)
1434 	struct proc *p;
1435 	register struct symlink_args /* {
1436 		syscallarg(char *) path;
1437 		syscallarg(char *) link;
1438 	} */ *uap;
1439 {
1440 	struct mount *mp;
1441 	struct vattr vattr;
1442 	char *path;
1443 	int error;
1444 	struct nameidata nd;
1445 
1446 	path = zalloc(namei_zone);
1447 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1448 		goto out;
1449 restart:
1450 	bwillwrite();
1451 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1452 	if ((error = namei(&nd)) != 0)
1453 		goto out;
1454 	if (nd.ni_vp) {
1455 		NDFREE(&nd, NDF_ONLY_PNBUF);
1456 		vrele(nd.ni_vp);
1457 		vput(nd.ni_dvp);
1458 		error = EEXIST;
1459 		goto out;
1460 	}
1461 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1462 		NDFREE(&nd, NDF_ONLY_PNBUF);
1463 		vput(nd.ni_dvp);
1464 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1465 			return (error);
1466 		goto restart;
1467 	}
1468 	VATTR_NULL(&vattr);
1469 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1470 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1471 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1472 	NDFREE(&nd, NDF_ONLY_PNBUF);
1473 	if (error == 0)
1474 		vput(nd.ni_vp);
1475 	vput(nd.ni_dvp);
1476 	vn_finished_write(mp);
1477 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1478 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1479 out:
1480 	zfree(namei_zone, path);
1481 	return (error);
1482 }
1483 
1484 /*
1485  * Delete a whiteout from the filesystem.
1486  */
1487 /* ARGSUSED */
1488 int
1489 undelete(p, uap)
1490 	struct proc *p;
1491 	register struct undelete_args /* {
1492 		syscallarg(char *) path;
1493 	} */ *uap;
1494 {
1495 	int error;
1496 	struct mount *mp;
1497 	struct nameidata nd;
1498 
1499 restart:
1500 	bwillwrite();
1501 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1502 	    SCARG(uap, path), p);
1503 	error = namei(&nd);
1504 	if (error)
1505 		return (error);
1506 
1507 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1508 		NDFREE(&nd, NDF_ONLY_PNBUF);
1509 		if (nd.ni_vp)
1510 			vrele(nd.ni_vp);
1511 		vput(nd.ni_dvp);
1512 		return (EEXIST);
1513 	}
1514 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1515 		NDFREE(&nd, NDF_ONLY_PNBUF);
1516 		vput(nd.ni_dvp);
1517 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1518 			return (error);
1519 		goto restart;
1520 	}
1521 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1522 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1523 	NDFREE(&nd, NDF_ONLY_PNBUF);
1524 	vput(nd.ni_dvp);
1525 	vn_finished_write(mp);
1526 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1527 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1528 	return (error);
1529 }
1530 
1531 /*
1532  * Delete a name from the filesystem.
1533  */
1534 #ifndef _SYS_SYSPROTO_H_
1535 struct unlink_args {
1536 	char	*path;
1537 };
1538 #endif
1539 /* ARGSUSED */
1540 int
1541 unlink(p, uap)
1542 	struct proc *p;
1543 	struct unlink_args /* {
1544 		syscallarg(char *) path;
1545 	} */ *uap;
1546 {
1547 	struct mount *mp;
1548 	struct vnode *vp;
1549 	int error;
1550 	struct nameidata nd;
1551 
1552 restart:
1553 	bwillwrite();
1554 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1555 	if ((error = namei(&nd)) != 0)
1556 		return (error);
1557 	vp = nd.ni_vp;
1558 	if (vp->v_type == VDIR)
1559 		error = EPERM;		/* POSIX */
1560 	else {
1561 		/*
1562 		 * The root of a mounted filesystem cannot be deleted.
1563 		 *
1564 		 * XXX: can this only be a VDIR case?
1565 		 */
1566 		if (vp->v_flag & VROOT)
1567 			error = EBUSY;
1568 	}
1569 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1570 		NDFREE(&nd, NDF_ONLY_PNBUF);
1571 		vrele(vp);
1572 		vput(nd.ni_dvp);
1573 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1574 			return (error);
1575 		goto restart;
1576 	}
1577 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1578 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1579 	if (!error) {
1580 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1581 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1582 	}
1583 	NDFREE(&nd, NDF_ONLY_PNBUF);
1584 	vput(nd.ni_dvp);
1585 	vput(vp);
1586 	vn_finished_write(mp);
1587 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1588 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1589 	return (error);
1590 }
1591 
1592 /*
1593  * Reposition read/write file offset.
1594  */
1595 #ifndef _SYS_SYSPROTO_H_
1596 struct lseek_args {
1597 	int	fd;
1598 	int	pad;
1599 	off_t	offset;
1600 	int	whence;
1601 };
1602 #endif
1603 int
1604 lseek(p, uap)
1605 	struct proc *p;
1606 	register struct lseek_args /* {
1607 		syscallarg(int) fd;
1608 		syscallarg(int) pad;
1609 		syscallarg(off_t) offset;
1610 		syscallarg(int) whence;
1611 	} */ *uap;
1612 {
1613 	struct ucred *cred = p->p_ucred;
1614 	register struct filedesc *fdp = p->p_fd;
1615 	register struct file *fp;
1616 	struct vattr vattr;
1617 	int error;
1618 
1619 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1620 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1621 		return (EBADF);
1622 	if (fp->f_type != DTYPE_VNODE)
1623 		return (ESPIPE);
1624 	switch (SCARG(uap, whence)) {
1625 	case L_INCR:
1626 		fp->f_offset += SCARG(uap, offset);
1627 		break;
1628 	case L_XTND:
1629 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1630 		if (error)
1631 			return (error);
1632 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1633 		break;
1634 	case L_SET:
1635 		fp->f_offset = SCARG(uap, offset);
1636 		break;
1637 	default:
1638 		return (EINVAL);
1639 	}
1640 	*(off_t *)(p->p_retval) = fp->f_offset;
1641 	return (0);
1642 }
1643 
1644 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1645 /*
1646  * Reposition read/write file offset.
1647  */
1648 #ifndef _SYS_SYSPROTO_H_
1649 struct olseek_args {
1650 	int	fd;
1651 	long	offset;
1652 	int	whence;
1653 };
1654 #endif
1655 int
1656 olseek(p, uap)
1657 	struct proc *p;
1658 	register struct olseek_args /* {
1659 		syscallarg(int) fd;
1660 		syscallarg(long) offset;
1661 		syscallarg(int) whence;
1662 	} */ *uap;
1663 {
1664 	struct lseek_args /* {
1665 		syscallarg(int) fd;
1666 		syscallarg(int) pad;
1667 		syscallarg(off_t) offset;
1668 		syscallarg(int) whence;
1669 	} */ nuap;
1670 	int error;
1671 
1672 	SCARG(&nuap, fd) = SCARG(uap, fd);
1673 	SCARG(&nuap, offset) = SCARG(uap, offset);
1674 	SCARG(&nuap, whence) = SCARG(uap, whence);
1675 	error = lseek(p, &nuap);
1676 	return (error);
1677 }
1678 #endif /* COMPAT_43 */
1679 
1680 /*
1681  * Check access permissions.
1682  */
1683 #ifndef _SYS_SYSPROTO_H_
1684 struct access_args {
1685 	char	*path;
1686 	int	flags;
1687 };
1688 #endif
1689 int
1690 access(p, uap)
1691 	struct proc *p;
1692 	register struct access_args /* {
1693 		syscallarg(char *) path;
1694 		syscallarg(int) flags;
1695 	} */ *uap;
1696 {
1697 	struct ucred *cred, *tmpcred;
1698 	register struct vnode *vp;
1699 	int error, flags;
1700 	struct nameidata nd;
1701 
1702 	cred = p->p_ucred;
1703 	/*
1704 	 * Create and modify a temporary credential instead of one that
1705 	 * is potentially shared.  This could also mess up socket
1706 	 * buffer accounting which can run in an interrupt context.
1707 	 *
1708 	 * XXX - Depending on how "threads" are finally implemented, it
1709 	 * may be better to explicitly pass the credential to namei()
1710 	 * rather than to modify the potentially shared process structure.
1711 	 */
1712 	tmpcred = crdup(cred);
1713 	tmpcred->cr_uid = cred->cr_ruid;
1714 	tmpcred->cr_groups[0] = cred->cr_rgid;
1715 	p->p_ucred = tmpcred;
1716 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717 	    SCARG(uap, path), p);
1718 	if ((error = namei(&nd)) != 0)
1719 		goto out1;
1720 	vp = nd.ni_vp;
1721 
1722 	/* Flags == 0 means only check for existence. */
1723 	if (SCARG(uap, flags)) {
1724 		flags = 0;
1725 		if (SCARG(uap, flags) & R_OK)
1726 			flags |= VREAD;
1727 		if (SCARG(uap, flags) & W_OK)
1728 			flags |= VWRITE;
1729 		if (SCARG(uap, flags) & X_OK)
1730 			flags |= VEXEC;
1731 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1732 			error = VOP_ACCESS(vp, flags, tmpcred, p);
1733 	}
1734 	NDFREE(&nd, NDF_ONLY_PNBUF);
1735 	vput(vp);
1736 out1:
1737 	p->p_ucred = cred;
1738 	crfree(tmpcred);
1739 	return (error);
1740 }
1741 
1742 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1743 /*
1744  * Get file status; this version follows links.
1745  */
1746 #ifndef _SYS_SYSPROTO_H_
1747 struct ostat_args {
1748 	char	*path;
1749 	struct ostat *ub;
1750 };
1751 #endif
1752 /* ARGSUSED */
1753 int
1754 ostat(p, uap)
1755 	struct proc *p;
1756 	register struct ostat_args /* {
1757 		syscallarg(char *) path;
1758 		syscallarg(struct ostat *) ub;
1759 	} */ *uap;
1760 {
1761 	struct stat sb;
1762 	struct ostat osb;
1763 	int error;
1764 	struct nameidata nd;
1765 
1766 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1767 	    SCARG(uap, path), p);
1768 	if ((error = namei(&nd)) != 0)
1769 		return (error);
1770 	NDFREE(&nd, NDF_ONLY_PNBUF);
1771 	error = vn_stat(nd.ni_vp, &sb, p);
1772 	vput(nd.ni_vp);
1773 	if (error)
1774 		return (error);
1775 	cvtstat(&sb, &osb);
1776 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1777 	return (error);
1778 }
1779 
1780 /*
1781  * Get file status; this version does not follow links.
1782  */
1783 #ifndef _SYS_SYSPROTO_H_
1784 struct olstat_args {
1785 	char	*path;
1786 	struct ostat *ub;
1787 };
1788 #endif
1789 /* ARGSUSED */
1790 int
1791 olstat(p, uap)
1792 	struct proc *p;
1793 	register struct olstat_args /* {
1794 		syscallarg(char *) path;
1795 		syscallarg(struct ostat *) ub;
1796 	} */ *uap;
1797 {
1798 	struct vnode *vp;
1799 	struct stat sb;
1800 	struct ostat osb;
1801 	int error;
1802 	struct nameidata nd;
1803 
1804 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1805 	    SCARG(uap, path), p);
1806 	if ((error = namei(&nd)) != 0)
1807 		return (error);
1808 	vp = nd.ni_vp;
1809 	error = vn_stat(vp, &sb, p);
1810 	NDFREE(&nd, NDF_ONLY_PNBUF);
1811 	vput(vp);
1812 	if (error)
1813 		return (error);
1814 	cvtstat(&sb, &osb);
1815 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1816 	return (error);
1817 }
1818 
1819 /*
1820  * Convert from an old to a new stat structure.
1821  */
1822 void
1823 cvtstat(st, ost)
1824 	struct stat *st;
1825 	struct ostat *ost;
1826 {
1827 
1828 	ost->st_dev = st->st_dev;
1829 	ost->st_ino = st->st_ino;
1830 	ost->st_mode = st->st_mode;
1831 	ost->st_nlink = st->st_nlink;
1832 	ost->st_uid = st->st_uid;
1833 	ost->st_gid = st->st_gid;
1834 	ost->st_rdev = st->st_rdev;
1835 	if (st->st_size < (quad_t)1 << 32)
1836 		ost->st_size = st->st_size;
1837 	else
1838 		ost->st_size = -2;
1839 	ost->st_atime = st->st_atime;
1840 	ost->st_mtime = st->st_mtime;
1841 	ost->st_ctime = st->st_ctime;
1842 	ost->st_blksize = st->st_blksize;
1843 	ost->st_blocks = st->st_blocks;
1844 	ost->st_flags = st->st_flags;
1845 	ost->st_gen = st->st_gen;
1846 }
1847 #endif /* COMPAT_43 || COMPAT_SUNOS */
1848 
1849 /*
1850  * Get file status; this version follows links.
1851  */
1852 #ifndef _SYS_SYSPROTO_H_
1853 struct stat_args {
1854 	char	*path;
1855 	struct stat *ub;
1856 };
1857 #endif
1858 /* ARGSUSED */
1859 int
1860 stat(p, uap)
1861 	struct proc *p;
1862 	register struct stat_args /* {
1863 		syscallarg(char *) path;
1864 		syscallarg(struct stat *) ub;
1865 	} */ *uap;
1866 {
1867 	struct stat sb;
1868 	int error;
1869 	struct nameidata nd;
1870 
1871 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1872 	    SCARG(uap, path), p);
1873 	if ((error = namei(&nd)) != 0)
1874 		return (error);
1875 	error = vn_stat(nd.ni_vp, &sb, p);
1876 	NDFREE(&nd, NDF_ONLY_PNBUF);
1877 	vput(nd.ni_vp);
1878 	if (error)
1879 		return (error);
1880 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1881 	return (error);
1882 }
1883 
1884 /*
1885  * Get file status; this version does not follow links.
1886  */
1887 #ifndef _SYS_SYSPROTO_H_
1888 struct lstat_args {
1889 	char	*path;
1890 	struct stat *ub;
1891 };
1892 #endif
1893 /* ARGSUSED */
1894 int
1895 lstat(p, uap)
1896 	struct proc *p;
1897 	register struct lstat_args /* {
1898 		syscallarg(char *) path;
1899 		syscallarg(struct stat *) ub;
1900 	} */ *uap;
1901 {
1902 	int error;
1903 	struct vnode *vp;
1904 	struct stat sb;
1905 	struct nameidata nd;
1906 
1907 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1908 	    SCARG(uap, path), p);
1909 	if ((error = namei(&nd)) != 0)
1910 		return (error);
1911 	vp = nd.ni_vp;
1912 	error = vn_stat(vp, &sb, p);
1913 	NDFREE(&nd, NDF_ONLY_PNBUF);
1914 	vput(vp);
1915 	if (error)
1916 		return (error);
1917 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1918 	return (error);
1919 }
1920 
1921 /*
1922  * Implementation of the NetBSD stat() function.
1923  * XXX This should probably be collapsed with the FreeBSD version,
1924  * as the differences are only due to vn_stat() clearing spares at
1925  * the end of the structures.  vn_stat could be split to avoid this,
1926  * and thus collapse the following to close to zero code.
1927  */
1928 void
1929 cvtnstat(sb, nsb)
1930 	struct stat *sb;
1931 	struct nstat *nsb;
1932 {
1933 	nsb->st_dev = sb->st_dev;
1934 	nsb->st_ino = sb->st_ino;
1935 	nsb->st_mode = sb->st_mode;
1936 	nsb->st_nlink = sb->st_nlink;
1937 	nsb->st_uid = sb->st_uid;
1938 	nsb->st_gid = sb->st_gid;
1939 	nsb->st_rdev = sb->st_rdev;
1940 	nsb->st_atimespec = sb->st_atimespec;
1941 	nsb->st_mtimespec = sb->st_mtimespec;
1942 	nsb->st_ctimespec = sb->st_ctimespec;
1943 	nsb->st_size = sb->st_size;
1944 	nsb->st_blocks = sb->st_blocks;
1945 	nsb->st_blksize = sb->st_blksize;
1946 	nsb->st_flags = sb->st_flags;
1947 	nsb->st_gen = sb->st_gen;
1948 	nsb->st_qspare[0] = sb->st_qspare[0];
1949 	nsb->st_qspare[1] = sb->st_qspare[1];
1950 }
1951 
1952 #ifndef _SYS_SYSPROTO_H_
1953 struct nstat_args {
1954 	char	*path;
1955 	struct nstat *ub;
1956 };
1957 #endif
1958 /* ARGSUSED */
1959 int
1960 nstat(p, uap)
1961 	struct proc *p;
1962 	register struct nstat_args /* {
1963 		syscallarg(char *) path;
1964 		syscallarg(struct nstat *) ub;
1965 	} */ *uap;
1966 {
1967 	struct stat sb;
1968 	struct nstat nsb;
1969 	int error;
1970 	struct nameidata nd;
1971 
1972 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1973 	    SCARG(uap, path), p);
1974 	if ((error = namei(&nd)) != 0)
1975 		return (error);
1976 	NDFREE(&nd, NDF_ONLY_PNBUF);
1977 	error = vn_stat(nd.ni_vp, &sb, p);
1978 	vput(nd.ni_vp);
1979 	if (error)
1980 		return (error);
1981 	cvtnstat(&sb, &nsb);
1982 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1983 	return (error);
1984 }
1985 
1986 /*
1987  * NetBSD lstat.  Get file status; this version does not follow links.
1988  */
1989 #ifndef _SYS_SYSPROTO_H_
1990 struct lstat_args {
1991 	char	*path;
1992 	struct stat *ub;
1993 };
1994 #endif
1995 /* ARGSUSED */
1996 int
1997 nlstat(p, uap)
1998 	struct proc *p;
1999 	register struct nlstat_args /* {
2000 		syscallarg(char *) path;
2001 		syscallarg(struct nstat *) ub;
2002 	} */ *uap;
2003 {
2004 	int error;
2005 	struct vnode *vp;
2006 	struct stat sb;
2007 	struct nstat nsb;
2008 	struct nameidata nd;
2009 
2010 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2011 	    SCARG(uap, path), p);
2012 	if ((error = namei(&nd)) != 0)
2013 		return (error);
2014 	vp = nd.ni_vp;
2015 	NDFREE(&nd, NDF_ONLY_PNBUF);
2016 	error = vn_stat(vp, &sb, p);
2017 	vput(vp);
2018 	if (error)
2019 		return (error);
2020 	cvtnstat(&sb, &nsb);
2021 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2022 	return (error);
2023 }
2024 
2025 /*
2026  * Get configurable pathname variables.
2027  */
2028 #ifndef _SYS_SYSPROTO_H_
2029 struct pathconf_args {
2030 	char	*path;
2031 	int	name;
2032 };
2033 #endif
2034 /* ARGSUSED */
2035 int
2036 pathconf(p, uap)
2037 	struct proc *p;
2038 	register struct pathconf_args /* {
2039 		syscallarg(char *) path;
2040 		syscallarg(int) name;
2041 	} */ *uap;
2042 {
2043 	int error;
2044 	struct nameidata nd;
2045 
2046 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2047 	    SCARG(uap, path), p);
2048 	if ((error = namei(&nd)) != 0)
2049 		return (error);
2050 	NDFREE(&nd, NDF_ONLY_PNBUF);
2051 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
2052 	vput(nd.ni_vp);
2053 	return (error);
2054 }
2055 
2056 /*
2057  * Return target name of a symbolic link.
2058  */
2059 #ifndef _SYS_SYSPROTO_H_
2060 struct readlink_args {
2061 	char	*path;
2062 	char	*buf;
2063 	int	count;
2064 };
2065 #endif
2066 /* ARGSUSED */
2067 int
2068 readlink(p, uap)
2069 	struct proc *p;
2070 	register struct readlink_args /* {
2071 		syscallarg(char *) path;
2072 		syscallarg(char *) buf;
2073 		syscallarg(int) count;
2074 	} */ *uap;
2075 {
2076 	register struct vnode *vp;
2077 	struct iovec aiov;
2078 	struct uio auio;
2079 	int error;
2080 	struct nameidata nd;
2081 
2082 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2083 	    SCARG(uap, path), p);
2084 	if ((error = namei(&nd)) != 0)
2085 		return (error);
2086 	NDFREE(&nd, NDF_ONLY_PNBUF);
2087 	vp = nd.ni_vp;
2088 	if (vp->v_type != VLNK)
2089 		error = EINVAL;
2090 	else {
2091 		aiov.iov_base = SCARG(uap, buf);
2092 		aiov.iov_len = SCARG(uap, count);
2093 		auio.uio_iov = &aiov;
2094 		auio.uio_iovcnt = 1;
2095 		auio.uio_offset = 0;
2096 		auio.uio_rw = UIO_READ;
2097 		auio.uio_segflg = UIO_USERSPACE;
2098 		auio.uio_procp = p;
2099 		auio.uio_resid = SCARG(uap, count);
2100 		error = VOP_READLINK(vp, &auio, p->p_ucred);
2101 	}
2102 	vput(vp);
2103 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2104 	return (error);
2105 }
2106 
2107 /*
2108  * Common implementation code for chflags() and fchflags().
2109  */
2110 static int
2111 setfflags(p, vp, flags)
2112 	struct proc *p;
2113 	struct vnode *vp;
2114 	int flags;
2115 {
2116 	int error;
2117 	struct mount *mp;
2118 	struct vattr vattr;
2119 
2120 	/*
2121 	 * Prevent non-root users from setting flags on devices.  When
2122 	 * a device is reused, users can retain ownership of the device
2123 	 * if they are allowed to set flags and programs assume that
2124 	 * chown can't fail when done as root.
2125 	 */
2126 	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2127 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
2128 		return (error);
2129 
2130 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2131 		return (error);
2132 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2133 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2134 	VATTR_NULL(&vattr);
2135 	vattr.va_flags = flags;
2136 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2137 	VOP_UNLOCK(vp, 0, p);
2138 	vn_finished_write(mp);
2139 	return (error);
2140 }
2141 
2142 /*
2143  * Change flags of a file given a path name.
2144  */
2145 #ifndef _SYS_SYSPROTO_H_
2146 struct chflags_args {
2147 	char	*path;
2148 	int	flags;
2149 };
2150 #endif
2151 /* ARGSUSED */
2152 int
2153 chflags(p, uap)
2154 	struct proc *p;
2155 	register struct chflags_args /* {
2156 		syscallarg(char *) path;
2157 		syscallarg(int) flags;
2158 	} */ *uap;
2159 {
2160 	int error;
2161 	struct nameidata nd;
2162 
2163 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2164 	if ((error = namei(&nd)) != 0)
2165 		return (error);
2166 	NDFREE(&nd, NDF_ONLY_PNBUF);
2167 	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2168 	vrele(nd.ni_vp);
2169 	return error;
2170 }
2171 
2172 /*
2173  * Change flags of a file given a file descriptor.
2174  */
2175 #ifndef _SYS_SYSPROTO_H_
2176 struct fchflags_args {
2177 	int	fd;
2178 	int	flags;
2179 };
2180 #endif
2181 /* ARGSUSED */
2182 int
2183 fchflags(p, uap)
2184 	struct proc *p;
2185 	register struct fchflags_args /* {
2186 		syscallarg(int) fd;
2187 		syscallarg(int) flags;
2188 	} */ *uap;
2189 {
2190 	struct file *fp;
2191 	int error;
2192 
2193 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2194 		return (error);
2195 	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2196 }
2197 
2198 /*
2199  * Common implementation code for chmod(), lchmod() and fchmod().
2200  */
2201 static int
2202 setfmode(p, vp, mode)
2203 	struct proc *p;
2204 	struct vnode *vp;
2205 	int mode;
2206 {
2207 	int error;
2208 	struct mount *mp;
2209 	struct vattr vattr;
2210 
2211 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2212 		return (error);
2213 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2214 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2215 	VATTR_NULL(&vattr);
2216 	vattr.va_mode = mode & ALLPERMS;
2217 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2218 	VOP_UNLOCK(vp, 0, p);
2219 	vn_finished_write(mp);
2220 	return error;
2221 }
2222 
2223 /*
2224  * Change mode of a file given path name.
2225  */
2226 #ifndef _SYS_SYSPROTO_H_
2227 struct chmod_args {
2228 	char	*path;
2229 	int	mode;
2230 };
2231 #endif
2232 /* ARGSUSED */
2233 int
2234 chmod(p, uap)
2235 	struct proc *p;
2236 	register struct chmod_args /* {
2237 		syscallarg(char *) path;
2238 		syscallarg(int) mode;
2239 	} */ *uap;
2240 {
2241 	int error;
2242 	struct nameidata nd;
2243 
2244 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2245 	if ((error = namei(&nd)) != 0)
2246 		return (error);
2247 	NDFREE(&nd, NDF_ONLY_PNBUF);
2248 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2249 	vrele(nd.ni_vp);
2250 	return error;
2251 }
2252 
2253 /*
2254  * Change mode of a file given path name (don't follow links.)
2255  */
2256 #ifndef _SYS_SYSPROTO_H_
2257 struct lchmod_args {
2258 	char	*path;
2259 	int	mode;
2260 };
2261 #endif
2262 /* ARGSUSED */
2263 int
2264 lchmod(p, uap)
2265 	struct proc *p;
2266 	register struct lchmod_args /* {
2267 		syscallarg(char *) path;
2268 		syscallarg(int) mode;
2269 	} */ *uap;
2270 {
2271 	int error;
2272 	struct nameidata nd;
2273 
2274 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2275 	if ((error = namei(&nd)) != 0)
2276 		return (error);
2277 	NDFREE(&nd, NDF_ONLY_PNBUF);
2278 	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2279 	vrele(nd.ni_vp);
2280 	return error;
2281 }
2282 
2283 /*
2284  * Change mode of a file given a file descriptor.
2285  */
2286 #ifndef _SYS_SYSPROTO_H_
2287 struct fchmod_args {
2288 	int	fd;
2289 	int	mode;
2290 };
2291 #endif
2292 /* ARGSUSED */
2293 int
2294 fchmod(p, uap)
2295 	struct proc *p;
2296 	register struct fchmod_args /* {
2297 		syscallarg(int) fd;
2298 		syscallarg(int) mode;
2299 	} */ *uap;
2300 {
2301 	struct file *fp;
2302 	int error;
2303 
2304 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2305 		return (error);
2306 	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2307 }
2308 
2309 /*
2310  * Common implementation for chown(), lchown(), and fchown()
2311  */
2312 static int
2313 setfown(p, vp, uid, gid)
2314 	struct proc *p;
2315 	struct vnode *vp;
2316 	uid_t uid;
2317 	gid_t gid;
2318 {
2319 	int error;
2320 	struct mount *mp;
2321 	struct vattr vattr;
2322 
2323 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2324 		return (error);
2325 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2327 	VATTR_NULL(&vattr);
2328 	vattr.va_uid = uid;
2329 	vattr.va_gid = gid;
2330 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2331 	VOP_UNLOCK(vp, 0, p);
2332 	vn_finished_write(mp);
2333 	return error;
2334 }
2335 
2336 /*
2337  * Set ownership given a path name.
2338  */
2339 #ifndef _SYS_SYSPROTO_H_
2340 struct chown_args {
2341 	char	*path;
2342 	int	uid;
2343 	int	gid;
2344 };
2345 #endif
2346 /* ARGSUSED */
2347 int
2348 chown(p, uap)
2349 	struct proc *p;
2350 	register struct chown_args /* {
2351 		syscallarg(char *) path;
2352 		syscallarg(int) uid;
2353 		syscallarg(int) gid;
2354 	} */ *uap;
2355 {
2356 	int error;
2357 	struct nameidata nd;
2358 
2359 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2360 	if ((error = namei(&nd)) != 0)
2361 		return (error);
2362 	NDFREE(&nd, NDF_ONLY_PNBUF);
2363 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2364 	vrele(nd.ni_vp);
2365 	return (error);
2366 }
2367 
2368 /*
2369  * Set ownership given a path name, do not cross symlinks.
2370  */
2371 #ifndef _SYS_SYSPROTO_H_
2372 struct lchown_args {
2373 	char	*path;
2374 	int	uid;
2375 	int	gid;
2376 };
2377 #endif
2378 /* ARGSUSED */
2379 int
2380 lchown(p, uap)
2381 	struct proc *p;
2382 	register struct lchown_args /* {
2383 		syscallarg(char *) path;
2384 		syscallarg(int) uid;
2385 		syscallarg(int) gid;
2386 	} */ *uap;
2387 {
2388 	int error;
2389 	struct nameidata nd;
2390 
2391 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2392 	if ((error = namei(&nd)) != 0)
2393 		return (error);
2394 	NDFREE(&nd, NDF_ONLY_PNBUF);
2395 	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2396 	vrele(nd.ni_vp);
2397 	return (error);
2398 }
2399 
2400 /*
2401  * Set ownership given a file descriptor.
2402  */
2403 #ifndef _SYS_SYSPROTO_H_
2404 struct fchown_args {
2405 	int	fd;
2406 	int	uid;
2407 	int	gid;
2408 };
2409 #endif
2410 /* ARGSUSED */
2411 int
2412 fchown(p, uap)
2413 	struct proc *p;
2414 	register struct fchown_args /* {
2415 		syscallarg(int) fd;
2416 		syscallarg(int) uid;
2417 		syscallarg(int) gid;
2418 	} */ *uap;
2419 {
2420 	struct file *fp;
2421 	int error;
2422 
2423 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2424 		return (error);
2425 	return setfown(p, (struct vnode *)fp->f_data,
2426 		SCARG(uap, uid), SCARG(uap, gid));
2427 }
2428 
2429 /*
2430  * Common implementation code for utimes(), lutimes(), and futimes().
2431  */
2432 static int
2433 getutimes(usrtvp, tsp)
2434 	const struct timeval *usrtvp;
2435 	struct timespec *tsp;
2436 {
2437 	struct timeval tv[2];
2438 	int error;
2439 
2440 	if (usrtvp == NULL) {
2441 		microtime(&tv[0]);
2442 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2443 		tsp[1] = tsp[0];
2444 	} else {
2445 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2446 			return (error);
2447 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2448 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2449 	}
2450 	return 0;
2451 }
2452 
2453 /*
2454  * Common implementation code for utimes(), lutimes(), and futimes().
2455  */
2456 static int
2457 setutimes(p, vp, ts, nullflag)
2458 	struct proc *p;
2459 	struct vnode *vp;
2460 	const struct timespec *ts;
2461 	int nullflag;
2462 {
2463 	int error;
2464 	struct mount *mp;
2465 	struct vattr vattr;
2466 
2467 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2468 		return (error);
2469 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2470 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2471 	VATTR_NULL(&vattr);
2472 	vattr.va_atime = ts[0];
2473 	vattr.va_mtime = ts[1];
2474 	if (nullflag)
2475 		vattr.va_vaflags |= VA_UTIMES_NULL;
2476 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2477 	VOP_UNLOCK(vp, 0, p);
2478 	vn_finished_write(mp);
2479 	return error;
2480 }
2481 
2482 /*
2483  * Set the access and modification times of a file.
2484  */
2485 #ifndef _SYS_SYSPROTO_H_
2486 struct utimes_args {
2487 	char	*path;
2488 	struct	timeval *tptr;
2489 };
2490 #endif
2491 /* ARGSUSED */
2492 int
2493 utimes(p, uap)
2494 	struct proc *p;
2495 	register struct utimes_args /* {
2496 		syscallarg(char *) path;
2497 		syscallarg(struct timeval *) tptr;
2498 	} */ *uap;
2499 {
2500 	struct timespec ts[2];
2501 	struct timeval *usrtvp;
2502 	int error;
2503 	struct nameidata nd;
2504 
2505 	usrtvp = SCARG(uap, tptr);
2506 	if ((error = getutimes(usrtvp, ts)) != 0)
2507 		return (error);
2508 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2509 	if ((error = namei(&nd)) != 0)
2510 		return (error);
2511 	NDFREE(&nd, NDF_ONLY_PNBUF);
2512 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2513 	vrele(nd.ni_vp);
2514 	return (error);
2515 }
2516 
2517 /*
2518  * Set the access and modification times of a file.
2519  */
2520 #ifndef _SYS_SYSPROTO_H_
2521 struct lutimes_args {
2522 	char	*path;
2523 	struct	timeval *tptr;
2524 };
2525 #endif
2526 /* ARGSUSED */
2527 int
2528 lutimes(p, uap)
2529 	struct proc *p;
2530 	register struct lutimes_args /* {
2531 		syscallarg(char *) path;
2532 		syscallarg(struct timeval *) tptr;
2533 	} */ *uap;
2534 {
2535 	struct timespec ts[2];
2536 	struct timeval *usrtvp;
2537 	int error;
2538 	struct nameidata nd;
2539 
2540 	usrtvp = SCARG(uap, tptr);
2541 	if ((error = getutimes(usrtvp, ts)) != 0)
2542 		return (error);
2543 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2544 	if ((error = namei(&nd)) != 0)
2545 		return (error);
2546 	NDFREE(&nd, NDF_ONLY_PNBUF);
2547 	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2548 	vrele(nd.ni_vp);
2549 	return (error);
2550 }
2551 
2552 /*
2553  * Set the access and modification times of a file.
2554  */
2555 #ifndef _SYS_SYSPROTO_H_
2556 struct futimes_args {
2557 	int	fd;
2558 	struct	timeval *tptr;
2559 };
2560 #endif
2561 /* ARGSUSED */
2562 int
2563 futimes(p, uap)
2564 	struct proc *p;
2565 	register struct futimes_args /* {
2566 		syscallarg(int ) fd;
2567 		syscallarg(struct timeval *) tptr;
2568 	} */ *uap;
2569 {
2570 	struct timespec ts[2];
2571 	struct file *fp;
2572 	struct timeval *usrtvp;
2573 	int error;
2574 
2575 	usrtvp = SCARG(uap, tptr);
2576 	if ((error = getutimes(usrtvp, ts)) != 0)
2577 		return (error);
2578 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2579 		return (error);
2580 	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2581 }
2582 
2583 /*
2584  * Truncate a file given its path name.
2585  */
2586 #ifndef _SYS_SYSPROTO_H_
2587 struct truncate_args {
2588 	char	*path;
2589 	int	pad;
2590 	off_t	length;
2591 };
2592 #endif
2593 /* ARGSUSED */
2594 int
2595 truncate(p, uap)
2596 	struct proc *p;
2597 	register struct truncate_args /* {
2598 		syscallarg(char *) path;
2599 		syscallarg(int) pad;
2600 		syscallarg(off_t) length;
2601 	} */ *uap;
2602 {
2603 	struct mount *mp;
2604 	struct vnode *vp;
2605 	struct vattr vattr;
2606 	int error;
2607 	struct nameidata nd;
2608 
2609 	if (uap->length < 0)
2610 		return(EINVAL);
2611 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2612 	if ((error = namei(&nd)) != 0)
2613 		return (error);
2614 	vp = nd.ni_vp;
2615 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2616 		vrele(vp);
2617 		return (error);
2618 	}
2619 	NDFREE(&nd, NDF_ONLY_PNBUF);
2620 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2621 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2622 	if (vp->v_type == VDIR)
2623 		error = EISDIR;
2624 	else if ((error = vn_writechk(vp)) == 0 &&
2625 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2626 		VATTR_NULL(&vattr);
2627 		vattr.va_size = SCARG(uap, length);
2628 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2629 	}
2630 	vput(vp);
2631 	vn_finished_write(mp);
2632 	return (error);
2633 }
2634 
2635 /*
2636  * Truncate a file given a file descriptor.
2637  */
2638 #ifndef _SYS_SYSPROTO_H_
2639 struct ftruncate_args {
2640 	int	fd;
2641 	int	pad;
2642 	off_t	length;
2643 };
2644 #endif
2645 /* ARGSUSED */
2646 int
2647 ftruncate(p, uap)
2648 	struct proc *p;
2649 	register struct ftruncate_args /* {
2650 		syscallarg(int) fd;
2651 		syscallarg(int) pad;
2652 		syscallarg(off_t) length;
2653 	} */ *uap;
2654 {
2655 	struct mount *mp;
2656 	struct vattr vattr;
2657 	struct vnode *vp;
2658 	struct file *fp;
2659 	int error;
2660 
2661 	if (uap->length < 0)
2662 		return(EINVAL);
2663 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2664 		return (error);
2665 	if ((fp->f_flag & FWRITE) == 0)
2666 		return (EINVAL);
2667 	vp = (struct vnode *)fp->f_data;
2668 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2669 		return (error);
2670 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2671 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2672 	if (vp->v_type == VDIR)
2673 		error = EISDIR;
2674 	else if ((error = vn_writechk(vp)) == 0) {
2675 		VATTR_NULL(&vattr);
2676 		vattr.va_size = SCARG(uap, length);
2677 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2678 	}
2679 	VOP_UNLOCK(vp, 0, p);
2680 	vn_finished_write(mp);
2681 	return (error);
2682 }
2683 
2684 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2685 /*
2686  * Truncate a file given its path name.
2687  */
2688 #ifndef _SYS_SYSPROTO_H_
2689 struct otruncate_args {
2690 	char	*path;
2691 	long	length;
2692 };
2693 #endif
2694 /* ARGSUSED */
2695 int
2696 otruncate(p, uap)
2697 	struct proc *p;
2698 	register struct otruncate_args /* {
2699 		syscallarg(char *) path;
2700 		syscallarg(long) length;
2701 	} */ *uap;
2702 {
2703 	struct truncate_args /* {
2704 		syscallarg(char *) path;
2705 		syscallarg(int) pad;
2706 		syscallarg(off_t) length;
2707 	} */ nuap;
2708 
2709 	SCARG(&nuap, path) = SCARG(uap, path);
2710 	SCARG(&nuap, length) = SCARG(uap, length);
2711 	return (truncate(p, &nuap));
2712 }
2713 
2714 /*
2715  * Truncate a file given a file descriptor.
2716  */
2717 #ifndef _SYS_SYSPROTO_H_
2718 struct oftruncate_args {
2719 	int	fd;
2720 	long	length;
2721 };
2722 #endif
2723 /* ARGSUSED */
2724 int
2725 oftruncate(p, uap)
2726 	struct proc *p;
2727 	register struct oftruncate_args /* {
2728 		syscallarg(int) fd;
2729 		syscallarg(long) length;
2730 	} */ *uap;
2731 {
2732 	struct ftruncate_args /* {
2733 		syscallarg(int) fd;
2734 		syscallarg(int) pad;
2735 		syscallarg(off_t) length;
2736 	} */ nuap;
2737 
2738 	SCARG(&nuap, fd) = SCARG(uap, fd);
2739 	SCARG(&nuap, length) = SCARG(uap, length);
2740 	return (ftruncate(p, &nuap));
2741 }
2742 #endif /* COMPAT_43 || COMPAT_SUNOS */
2743 
2744 /*
2745  * Sync an open file.
2746  */
2747 #ifndef _SYS_SYSPROTO_H_
2748 struct fsync_args {
2749 	int	fd;
2750 };
2751 #endif
2752 /* ARGSUSED */
2753 int
2754 fsync(p, uap)
2755 	struct proc *p;
2756 	struct fsync_args /* {
2757 		syscallarg(int) fd;
2758 	} */ *uap;
2759 {
2760 	struct vnode *vp;
2761 	struct mount *mp;
2762 	struct file *fp;
2763 	vm_object_t obj;
2764 	int error;
2765 
2766 	GIANT_REQUIRED;
2767 
2768 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2769 		return (error);
2770 	vp = (struct vnode *)fp->f_data;
2771 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2772 		return (error);
2773 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2774 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2775 		vm_object_page_clean(obj, 0, 0, 0);
2776 	}
2777 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2778 #ifdef SOFTUPDATES
2779 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2780 	    error = softdep_fsync(vp);
2781 #endif
2782 
2783 	VOP_UNLOCK(vp, 0, p);
2784 	vn_finished_write(mp);
2785 	return (error);
2786 }
2787 
2788 /*
2789  * Rename files.  Source and destination must either both be directories,
2790  * or both not be directories.  If target is a directory, it must be empty.
2791  */
2792 #ifndef _SYS_SYSPROTO_H_
2793 struct rename_args {
2794 	char	*from;
2795 	char	*to;
2796 };
2797 #endif
2798 /* ARGSUSED */
2799 int
2800 rename(p, uap)
2801 	struct proc *p;
2802 	register struct rename_args /* {
2803 		syscallarg(char *) from;
2804 		syscallarg(char *) to;
2805 	} */ *uap;
2806 {
2807 	struct mount *mp;
2808 	struct vnode *tvp, *fvp, *tdvp;
2809 	struct nameidata fromnd, tond;
2810 	int error;
2811 
2812 	bwillwrite();
2813 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2814 	    SCARG(uap, from), p);
2815 	if ((error = namei(&fromnd)) != 0)
2816 		return (error);
2817 	fvp = fromnd.ni_vp;
2818 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2819 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2820 		vrele(fromnd.ni_dvp);
2821 		vrele(fvp);
2822 		goto out1;
2823 	}
2824 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2825 	    UIO_USERSPACE, SCARG(uap, to), p);
2826 	if (fromnd.ni_vp->v_type == VDIR)
2827 		tond.ni_cnd.cn_flags |= WILLBEDIR;
2828 	if ((error = namei(&tond)) != 0) {
2829 		/* Translate error code for rename("dir1", "dir2/."). */
2830 		if (error == EISDIR && fvp->v_type == VDIR)
2831 			error = EINVAL;
2832 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2833 		vrele(fromnd.ni_dvp);
2834 		vrele(fvp);
2835 		goto out1;
2836 	}
2837 	tdvp = tond.ni_dvp;
2838 	tvp = tond.ni_vp;
2839 	if (tvp != NULL) {
2840 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2841 			error = ENOTDIR;
2842 			goto out;
2843 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2844 			error = EISDIR;
2845 			goto out;
2846 		}
2847 	}
2848 	if (fvp == tdvp)
2849 		error = EINVAL;
2850 	/*
2851 	 * If source is the same as the destination (that is the
2852 	 * same inode number with the same name in the same directory),
2853 	 * then there is nothing to do.
2854 	 */
2855 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2856 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2857 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2858 	      fromnd.ni_cnd.cn_namelen))
2859 		error = -1;
2860 out:
2861 	if (!error) {
2862 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2863 		if (fromnd.ni_dvp != tdvp) {
2864 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2865 		}
2866 		if (tvp) {
2867 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2868 		}
2869 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2870 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2871 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2872 		NDFREE(&tond, NDF_ONLY_PNBUF);
2873 	} else {
2874 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2875 		NDFREE(&tond, NDF_ONLY_PNBUF);
2876 		if (tdvp == tvp)
2877 			vrele(tdvp);
2878 		else
2879 			vput(tdvp);
2880 		if (tvp)
2881 			vput(tvp);
2882 		vrele(fromnd.ni_dvp);
2883 		vrele(fvp);
2884 	}
2885 	vrele(tond.ni_startdir);
2886 	vn_finished_write(mp);
2887 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2888 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2889 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2890 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2891 out1:
2892 	if (fromnd.ni_startdir)
2893 		vrele(fromnd.ni_startdir);
2894 	if (error == -1)
2895 		return (0);
2896 	return (error);
2897 }
2898 
2899 /*
2900  * Make a directory file.
2901  */
2902 #ifndef _SYS_SYSPROTO_H_
2903 struct mkdir_args {
2904 	char	*path;
2905 	int	mode;
2906 };
2907 #endif
2908 /* ARGSUSED */
2909 int
2910 mkdir(p, uap)
2911 	struct proc *p;
2912 	register struct mkdir_args /* {
2913 		syscallarg(char *) path;
2914 		syscallarg(int) mode;
2915 	} */ *uap;
2916 {
2917 	struct mount *mp;
2918 	struct vnode *vp;
2919 	struct vattr vattr;
2920 	int error;
2921 	struct nameidata nd;
2922 
2923 restart:
2924 	bwillwrite();
2925 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2926 	nd.ni_cnd.cn_flags |= WILLBEDIR;
2927 	if ((error = namei(&nd)) != 0)
2928 		return (error);
2929 	vp = nd.ni_vp;
2930 	if (vp != NULL) {
2931 		NDFREE(&nd, NDF_ONLY_PNBUF);
2932 		vrele(vp);
2933 		vput(nd.ni_dvp);
2934 		return (EEXIST);
2935 	}
2936 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2937 		NDFREE(&nd, NDF_ONLY_PNBUF);
2938 		vput(nd.ni_dvp);
2939 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2940 			return (error);
2941 		goto restart;
2942 	}
2943 	VATTR_NULL(&vattr);
2944 	vattr.va_type = VDIR;
2945 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2946 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2947 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2948 	NDFREE(&nd, NDF_ONLY_PNBUF);
2949 	vput(nd.ni_dvp);
2950 	if (!error)
2951 		vput(nd.ni_vp);
2952 	vn_finished_write(mp);
2953 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2954 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2955 	return (error);
2956 }
2957 
2958 /*
2959  * Remove a directory file.
2960  */
2961 #ifndef _SYS_SYSPROTO_H_
2962 struct rmdir_args {
2963 	char	*path;
2964 };
2965 #endif
2966 /* ARGSUSED */
2967 int
2968 rmdir(p, uap)
2969 	struct proc *p;
2970 	struct rmdir_args /* {
2971 		syscallarg(char *) path;
2972 	} */ *uap;
2973 {
2974 	struct mount *mp;
2975 	struct vnode *vp;
2976 	int error;
2977 	struct nameidata nd;
2978 
2979 restart:
2980 	bwillwrite();
2981 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2982 	    SCARG(uap, path), p);
2983 	if ((error = namei(&nd)) != 0)
2984 		return (error);
2985 	vp = nd.ni_vp;
2986 	if (vp->v_type != VDIR) {
2987 		error = ENOTDIR;
2988 		goto out;
2989 	}
2990 	/*
2991 	 * No rmdir "." please.
2992 	 */
2993 	if (nd.ni_dvp == vp) {
2994 		error = EINVAL;
2995 		goto out;
2996 	}
2997 	/*
2998 	 * The root of a mounted filesystem cannot be deleted.
2999 	 */
3000 	if (vp->v_flag & VROOT) {
3001 		error = EBUSY;
3002 		goto out;
3003 	}
3004 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3005 		NDFREE(&nd, NDF_ONLY_PNBUF);
3006 		if (nd.ni_dvp == vp)
3007 			vrele(nd.ni_dvp);
3008 		else
3009 			vput(nd.ni_dvp);
3010 		vput(vp);
3011 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3012 			return (error);
3013 		goto restart;
3014 	}
3015 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3016 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3017 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3018 	vn_finished_write(mp);
3019 out:
3020 	NDFREE(&nd, NDF_ONLY_PNBUF);
3021 	if (nd.ni_dvp == vp)
3022 		vrele(nd.ni_dvp);
3023 	else
3024 		vput(nd.ni_dvp);
3025 	vput(vp);
3026 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3027 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3028 	return (error);
3029 }
3030 
3031 #ifdef COMPAT_43
3032 /*
3033  * Read a block of directory entries in a file system independent format.
3034  */
3035 #ifndef _SYS_SYSPROTO_H_
3036 struct ogetdirentries_args {
3037 	int	fd;
3038 	char	*buf;
3039 	u_int	count;
3040 	long	*basep;
3041 };
3042 #endif
3043 int
3044 ogetdirentries(p, uap)
3045 	struct proc *p;
3046 	register struct ogetdirentries_args /* {
3047 		syscallarg(int) fd;
3048 		syscallarg(char *) buf;
3049 		syscallarg(u_int) count;
3050 		syscallarg(long *) basep;
3051 	} */ *uap;
3052 {
3053 	struct vnode *vp;
3054 	struct file *fp;
3055 	struct uio auio, kuio;
3056 	struct iovec aiov, kiov;
3057 	struct dirent *dp, *edp;
3058 	caddr_t dirbuf;
3059 	int error, eofflag, readcnt;
3060 	long loff;
3061 
3062 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3063 		return (error);
3064 	if ((fp->f_flag & FREAD) == 0)
3065 		return (EBADF);
3066 	vp = (struct vnode *)fp->f_data;
3067 unionread:
3068 	if (vp->v_type != VDIR)
3069 		return (EINVAL);
3070 	aiov.iov_base = SCARG(uap, buf);
3071 	aiov.iov_len = SCARG(uap, count);
3072 	auio.uio_iov = &aiov;
3073 	auio.uio_iovcnt = 1;
3074 	auio.uio_rw = UIO_READ;
3075 	auio.uio_segflg = UIO_USERSPACE;
3076 	auio.uio_procp = p;
3077 	auio.uio_resid = SCARG(uap, count);
3078 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3079 	loff = auio.uio_offset = fp->f_offset;
3080 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3081 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3082 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3083 			    NULL, NULL);
3084 			fp->f_offset = auio.uio_offset;
3085 		} else
3086 #	endif
3087 	{
3088 		kuio = auio;
3089 		kuio.uio_iov = &kiov;
3090 		kuio.uio_segflg = UIO_SYSSPACE;
3091 		kiov.iov_len = SCARG(uap, count);
3092 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3093 		kiov.iov_base = dirbuf;
3094 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3095 			    NULL, NULL);
3096 		fp->f_offset = kuio.uio_offset;
3097 		if (error == 0) {
3098 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3099 			edp = (struct dirent *)&dirbuf[readcnt];
3100 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3101 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3102 					/*
3103 					 * The expected low byte of
3104 					 * dp->d_namlen is our dp->d_type.
3105 					 * The high MBZ byte of dp->d_namlen
3106 					 * is our dp->d_namlen.
3107 					 */
3108 					dp->d_type = dp->d_namlen;
3109 					dp->d_namlen = 0;
3110 #				else
3111 					/*
3112 					 * The dp->d_type is the high byte
3113 					 * of the expected dp->d_namlen,
3114 					 * so must be zero'ed.
3115 					 */
3116 					dp->d_type = 0;
3117 #				endif
3118 				if (dp->d_reclen > 0) {
3119 					dp = (struct dirent *)
3120 					    ((char *)dp + dp->d_reclen);
3121 				} else {
3122 					error = EIO;
3123 					break;
3124 				}
3125 			}
3126 			if (dp >= edp)
3127 				error = uiomove(dirbuf, readcnt, &auio);
3128 		}
3129 		FREE(dirbuf, M_TEMP);
3130 	}
3131 	VOP_UNLOCK(vp, 0, p);
3132 	if (error)
3133 		return (error);
3134 	if (SCARG(uap, count) == auio.uio_resid) {
3135 		if (union_dircheckp) {
3136 			error = union_dircheckp(p, &vp, fp);
3137 			if (error == -1)
3138 				goto unionread;
3139 			if (error)
3140 				return (error);
3141 		}
3142 		if ((vp->v_flag & VROOT) &&
3143 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3144 			struct vnode *tvp = vp;
3145 			vp = vp->v_mount->mnt_vnodecovered;
3146 			VREF(vp);
3147 			fp->f_data = (caddr_t) vp;
3148 			fp->f_offset = 0;
3149 			vrele(tvp);
3150 			goto unionread;
3151 		}
3152 	}
3153 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3154 	    sizeof(long));
3155 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3156 	return (error);
3157 }
3158 #endif /* COMPAT_43 */
3159 
3160 /*
3161  * Read a block of directory entries in a file system independent format.
3162  */
3163 #ifndef _SYS_SYSPROTO_H_
3164 struct getdirentries_args {
3165 	int	fd;
3166 	char	*buf;
3167 	u_int	count;
3168 	long	*basep;
3169 };
3170 #endif
3171 int
3172 getdirentries(p, uap)
3173 	struct proc *p;
3174 	register struct getdirentries_args /* {
3175 		syscallarg(int) fd;
3176 		syscallarg(char *) buf;
3177 		syscallarg(u_int) count;
3178 		syscallarg(long *) basep;
3179 	} */ *uap;
3180 {
3181 	struct vnode *vp;
3182 	struct file *fp;
3183 	struct uio auio;
3184 	struct iovec aiov;
3185 	long loff;
3186 	int error, eofflag;
3187 
3188 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3189 		return (error);
3190 	if ((fp->f_flag & FREAD) == 0)
3191 		return (EBADF);
3192 	vp = (struct vnode *)fp->f_data;
3193 unionread:
3194 	if (vp->v_type != VDIR)
3195 		return (EINVAL);
3196 	aiov.iov_base = SCARG(uap, buf);
3197 	aiov.iov_len = SCARG(uap, count);
3198 	auio.uio_iov = &aiov;
3199 	auio.uio_iovcnt = 1;
3200 	auio.uio_rw = UIO_READ;
3201 	auio.uio_segflg = UIO_USERSPACE;
3202 	auio.uio_procp = p;
3203 	auio.uio_resid = SCARG(uap, count);
3204 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3205 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3206 	loff = auio.uio_offset = fp->f_offset;
3207 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3208 	fp->f_offset = auio.uio_offset;
3209 	VOP_UNLOCK(vp, 0, p);
3210 	if (error)
3211 		return (error);
3212 	if (SCARG(uap, count) == auio.uio_resid) {
3213 		if (union_dircheckp) {
3214 			error = union_dircheckp(p, &vp, fp);
3215 			if (error == -1)
3216 				goto unionread;
3217 			if (error)
3218 				return (error);
3219 		}
3220 		if ((vp->v_flag & VROOT) &&
3221 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3222 			struct vnode *tvp = vp;
3223 			vp = vp->v_mount->mnt_vnodecovered;
3224 			VREF(vp);
3225 			fp->f_data = (caddr_t) vp;
3226 			fp->f_offset = 0;
3227 			vrele(tvp);
3228 			goto unionread;
3229 		}
3230 	}
3231 	if (SCARG(uap, basep) != NULL) {
3232 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3233 		    sizeof(long));
3234 	}
3235 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3236 	return (error);
3237 }
3238 #ifndef _SYS_SYSPROTO_H_
3239 struct getdents_args {
3240 	int fd;
3241 	char *buf;
3242 	size_t count;
3243 };
3244 #endif
3245 int
3246 getdents(p, uap)
3247 	struct proc *p;
3248 	register struct getdents_args /* {
3249 		syscallarg(int) fd;
3250 		syscallarg(char *) buf;
3251 		syscallarg(u_int) count;
3252 	} */ *uap;
3253 {
3254 	struct getdirentries_args ap;
3255 	ap.fd = uap->fd;
3256 	ap.buf = uap->buf;
3257 	ap.count = uap->count;
3258 	ap.basep = NULL;
3259 	return getdirentries(p, &ap);
3260 }
3261 
3262 /*
3263  * Set the mode mask for creation of filesystem nodes.
3264  *
3265  * MP SAFE
3266  */
3267 #ifndef _SYS_SYSPROTO_H_
3268 struct umask_args {
3269 	int	newmask;
3270 };
3271 #endif
3272 int
3273 umask(p, uap)
3274 	struct proc *p;
3275 	struct umask_args /* {
3276 		syscallarg(int) newmask;
3277 	} */ *uap;
3278 {
3279 	register struct filedesc *fdp;
3280 
3281 	fdp = p->p_fd;
3282 	p->p_retval[0] = fdp->fd_cmask;
3283 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3284 	return (0);
3285 }
3286 
3287 /*
3288  * Void all references to file by ripping underlying filesystem
3289  * away from vnode.
3290  */
3291 #ifndef _SYS_SYSPROTO_H_
3292 struct revoke_args {
3293 	char	*path;
3294 };
3295 #endif
3296 /* ARGSUSED */
3297 int
3298 revoke(p, uap)
3299 	struct proc *p;
3300 	register struct revoke_args /* {
3301 		syscallarg(char *) path;
3302 	} */ *uap;
3303 {
3304 	struct mount *mp;
3305 	struct vnode *vp;
3306 	struct vattr vattr;
3307 	int error;
3308 	struct nameidata nd;
3309 
3310 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3311 	if ((error = namei(&nd)) != 0)
3312 		return (error);
3313 	vp = nd.ni_vp;
3314 	NDFREE(&nd, NDF_ONLY_PNBUF);
3315 	if (vp->v_type != VCHR) {
3316 		error = EINVAL;
3317 		goto out;
3318 	}
3319 	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3320 		goto out;
3321 	if (p->p_ucred->cr_uid != vattr.va_uid &&
3322 	    (error = suser_xxx(0, p, PRISON_ROOT)))
3323 		goto out;
3324 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3325 		goto out;
3326 	if (vcount(vp) > 1)
3327 		VOP_REVOKE(vp, REVOKEALL);
3328 	vn_finished_write(mp);
3329 out:
3330 	vrele(vp);
3331 	return (error);
3332 }
3333 
3334 /*
3335  * Convert a user file descriptor to a kernel file entry.
3336  */
3337 int
3338 getvnode(fdp, fd, fpp)
3339 	struct filedesc *fdp;
3340 	int fd;
3341 	struct file **fpp;
3342 {
3343 	struct file *fp;
3344 
3345 	if ((u_int)fd >= fdp->fd_nfiles ||
3346 	    (fp = fdp->fd_ofiles[fd]) == NULL)
3347 		return (EBADF);
3348 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3349 		return (EINVAL);
3350 	*fpp = fp;
3351 	return (0);
3352 }
3353 /*
3354  * Get (NFS) file handle
3355  */
3356 #ifndef _SYS_SYSPROTO_H_
3357 struct getfh_args {
3358 	char	*fname;
3359 	fhandle_t *fhp;
3360 };
3361 #endif
3362 int
3363 getfh(p, uap)
3364 	struct proc *p;
3365 	register struct getfh_args *uap;
3366 {
3367 	struct nameidata nd;
3368 	fhandle_t fh;
3369 	register struct vnode *vp;
3370 	int error;
3371 
3372 	/*
3373 	 * Must be super user
3374 	 */
3375 	error = suser(p);
3376 	if (error)
3377 		return (error);
3378 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3379 	error = namei(&nd);
3380 	if (error)
3381 		return (error);
3382 	NDFREE(&nd, NDF_ONLY_PNBUF);
3383 	vp = nd.ni_vp;
3384 	bzero(&fh, sizeof(fh));
3385 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3386 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3387 	vput(vp);
3388 	if (error)
3389 		return (error);
3390 	error = copyout(&fh, uap->fhp, sizeof (fh));
3391 	return (error);
3392 }
3393 
3394 /*
3395  * syscall for the rpc.lockd to use to translate a NFS file handle into
3396  * an open descriptor.
3397  *
3398  * warning: do not remove the suser() call or this becomes one giant
3399  * security hole.
3400  */
3401 #ifndef _SYS_SYSPROTO_H_
3402 struct fhopen_args {
3403 	const struct fhandle *u_fhp;
3404 	int flags;
3405 };
3406 #endif
3407 int
3408 fhopen(p, uap)
3409 	struct proc *p;
3410 	struct fhopen_args /* {
3411 		syscallarg(const struct fhandle *) u_fhp;
3412 		syscallarg(int) flags;
3413 	} */ *uap;
3414 {
3415 	struct mount *mp;
3416 	struct vnode *vp;
3417 	struct fhandle fhp;
3418 	struct vattr vat;
3419 	struct vattr *vap = &vat;
3420 	struct flock lf;
3421 	struct file *fp;
3422 	register struct filedesc *fdp = p->p_fd;
3423 	int fmode, mode, error, type;
3424 	struct file *nfp;
3425 	int indx;
3426 
3427 	/*
3428 	 * Must be super user
3429 	 */
3430 	error = suser(p);
3431 	if (error)
3432 		return (error);
3433 
3434 	fmode = FFLAGS(SCARG(uap, flags));
3435 	/* why not allow a non-read/write open for our lockd? */
3436 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3437 		return (EINVAL);
3438 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3439 	if (error)
3440 		return(error);
3441 	/* find the mount point */
3442 	mp = vfs_getvfs(&fhp.fh_fsid);
3443 	if (mp == NULL)
3444 		return (ESTALE);
3445 	/* now give me my vnode, it gets returned to me locked */
3446 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3447 	if (error)
3448 		return (error);
3449  	/*
3450 	 * from now on we have to make sure not
3451 	 * to forget about the vnode
3452 	 * any error that causes an abort must vput(vp)
3453 	 * just set error = err and 'goto bad;'.
3454 	 */
3455 
3456 	/*
3457 	 * from vn_open
3458 	 */
3459 	if (vp->v_type == VLNK) {
3460 		error = EMLINK;
3461 		goto bad;
3462 	}
3463 	if (vp->v_type == VSOCK) {
3464 		error = EOPNOTSUPP;
3465 		goto bad;
3466 	}
3467 	mode = 0;
3468 	if (fmode & (FWRITE | O_TRUNC)) {
3469 		if (vp->v_type == VDIR) {
3470 			error = EISDIR;
3471 			goto bad;
3472 		}
3473 		error = vn_writechk(vp);
3474 		if (error)
3475 			goto bad;
3476 		mode |= VWRITE;
3477 	}
3478 	if (fmode & FREAD)
3479 		mode |= VREAD;
3480 	if (mode) {
3481 		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3482 		if (error)
3483 			goto bad;
3484 	}
3485 	if (fmode & O_TRUNC) {
3486 		VOP_UNLOCK(vp, 0, p);				/* XXX */
3487 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3488 			vrele(vp);
3489 			return (error);
3490 		}
3491 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3492 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3493 		VATTR_NULL(vap);
3494 		vap->va_size = 0;
3495 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3496 		vn_finished_write(mp);
3497 		if (error)
3498 			goto bad;
3499 	}
3500 	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3501 	if (error)
3502 		goto bad;
3503 	/*
3504 	 * Make sure that a VM object is created for VMIO support.
3505 	 */
3506 	if (vn_canvmio(vp) == TRUE) {
3507 		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3508 			goto bad;
3509 	}
3510 	if (fmode & FWRITE)
3511 		vp->v_writecount++;
3512 
3513 	/*
3514 	 * end of vn_open code
3515 	 */
3516 
3517 	if ((error = falloc(p, &nfp, &indx)) != 0)
3518 		goto bad;
3519 	fp = nfp;
3520 
3521 	/*
3522 	 * Hold an extra reference to avoid having fp ripped out
3523 	 * from under us while we block in the lock op
3524 	 */
3525 	fhold(fp);
3526 	nfp->f_data = (caddr_t)vp;
3527 	nfp->f_flag = fmode & FMASK;
3528 	nfp->f_ops = &vnops;
3529 	nfp->f_type = DTYPE_VNODE;
3530 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3531 		lf.l_whence = SEEK_SET;
3532 		lf.l_start = 0;
3533 		lf.l_len = 0;
3534 		if (fmode & O_EXLOCK)
3535 			lf.l_type = F_WRLCK;
3536 		else
3537 			lf.l_type = F_RDLCK;
3538 		type = F_FLOCK;
3539 		if ((fmode & FNONBLOCK) == 0)
3540 			type |= F_WAIT;
3541 		VOP_UNLOCK(vp, 0, p);
3542 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3543 			/*
3544 			 * The lock request failed.  Normally close the
3545 			 * descriptor but handle the case where someone might
3546 			 * have dup()d or close()d it when we weren't looking.
3547 			 */
3548 			if (fdp->fd_ofiles[indx] == fp) {
3549 				fdp->fd_ofiles[indx] = NULL;
3550 				fdrop(fp, p);
3551 			}
3552 			/*
3553 			 * release our private reference
3554 			 */
3555 			fdrop(fp, p);
3556 			return(error);
3557 		}
3558 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3559 		fp->f_flag |= FHASLOCK;
3560 	}
3561 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3562 		vfs_object_create(vp, p, p->p_ucred);
3563 
3564 	VOP_UNLOCK(vp, 0, p);
3565 	fdrop(fp, p);
3566 	p->p_retval[0] = indx;
3567 	return (0);
3568 
3569 bad:
3570 	vput(vp);
3571 	return (error);
3572 }
3573 
3574 /*
3575  * Stat an (NFS) file handle.
3576  */
3577 #ifndef _SYS_SYSPROTO_H_
3578 struct fhstat_args {
3579 	struct fhandle *u_fhp;
3580 	struct stat *sb;
3581 };
3582 #endif
3583 int
3584 fhstat(p, uap)
3585 	struct proc *p;
3586 	register struct fhstat_args /* {
3587 		syscallarg(struct fhandle *) u_fhp;
3588 		syscallarg(struct stat *) sb;
3589 	} */ *uap;
3590 {
3591 	struct stat sb;
3592 	fhandle_t fh;
3593 	struct mount *mp;
3594 	struct vnode *vp;
3595 	int error;
3596 
3597 	/*
3598 	 * Must be super user
3599 	 */
3600 	error = suser(p);
3601 	if (error)
3602 		return (error);
3603 
3604 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3605 	if (error)
3606 		return (error);
3607 
3608 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3609 		return (ESTALE);
3610 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3611 		return (error);
3612 	error = vn_stat(vp, &sb, p);
3613 	vput(vp);
3614 	if (error)
3615 		return (error);
3616 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3617 	return (error);
3618 }
3619 
3620 /*
3621  * Implement fstatfs() for (NFS) file handles.
3622  */
3623 #ifndef _SYS_SYSPROTO_H_
3624 struct fhstatfs_args {
3625 	struct fhandle *u_fhp;
3626 	struct statfs *buf;
3627 };
3628 #endif
3629 int
3630 fhstatfs(p, uap)
3631 	struct proc *p;
3632 	struct fhstatfs_args /* {
3633 		syscallarg(struct fhandle) *u_fhp;
3634 		syscallarg(struct statfs) *buf;
3635 	} */ *uap;
3636 {
3637 	struct statfs *sp;
3638 	struct mount *mp;
3639 	struct vnode *vp;
3640 	struct statfs sb;
3641 	fhandle_t fh;
3642 	int error;
3643 
3644 	/*
3645 	 * Must be super user
3646 	 */
3647 	if ((error = suser(p)))
3648 		return (error);
3649 
3650 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3651 		return (error);
3652 
3653 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3654 		return (ESTALE);
3655 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3656 		return (error);
3657 	mp = vp->v_mount;
3658 	sp = &mp->mnt_stat;
3659 	vput(vp);
3660 	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3661 		return (error);
3662 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3663 	if (suser_xxx(p->p_ucred, 0, 0)) {
3664 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3665 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3666 		sp = &sb;
3667 	}
3668 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3669 }
3670 
3671 /*
3672  * Syscall to push extended attribute configuration information into the
3673  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3674  * a command (int cmd), and attribute name and misc data.  For now, the
3675  * attribute name is left in userspace for consumption by the VFS_op.
3676  * It will probably be changed to be copied into sysspace by the
3677  * syscall in the future, once issues with various consumers of the
3678  * attribute code have raised their hands.
3679  *
3680  * Currently this is used only by UFS Extended Attributes.
3681  */
3682 int
3683 extattrctl(p, uap)
3684 	struct proc *p;
3685 	struct extattrctl_args *uap;
3686 {
3687 	struct vnode *filename_vp;
3688 	struct nameidata nd;
3689 	struct mount *mp;
3690 	char attrname[EXTATTR_MAXNAMELEN];
3691 	int error;
3692 
3693 	/*
3694 	 * SCARG(uap, attrname) not always defined.  We check again later
3695 	 * when we invoke the VFS call so as to pass in NULL there if needed.
3696 	 */
3697 	if (SCARG(uap, attrname) != NULL) {
3698 		error = copyinstr(SCARG(uap, attrname), attrname,
3699 		    EXTATTR_MAXNAMELEN, NULL);
3700 		if (error)
3701 			return (error);
3702 	}
3703 
3704 	/*
3705 	 * SCARG(uap, filename) not always defined.  If it is, grab
3706 	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3707 	 */
3708 	filename_vp = NULL;
3709 	if (SCARG(uap, filename) != NULL) {
3710 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3711 		    SCARG(uap, filename), p);
3712 		if ((error = namei(&nd)) != 0)
3713 			return (error);
3714 		filename_vp = nd.ni_vp;
3715 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3716 	}
3717 
3718 	/* SCARG(uap, path) always defined. */
3719 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3720 	if ((error = namei(&nd)) != 0)
3721 		return (error);
3722 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3723 	NDFREE(&nd, 0);
3724 	if (error) {
3725 		if (filename_vp)
3726 			vrele(filename_vp);
3727 		return (error);
3728 	}
3729 
3730 	if (SCARG(uap, attrname) != NULL) {
3731 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3732 		    SCARG(uap, attrnamespace), attrname, p);
3733 	} else {
3734 		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3735 		    SCARG(uap, attrnamespace), NULL, p);
3736 	}
3737 
3738 	vn_finished_write(mp);
3739 	/*
3740 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3741 	 * filename_vp, so vrele it if it is defined.
3742 	 */
3743 	if (filename_vp != NULL)
3744 		vrele(filename_vp);
3745 
3746 	return (error);
3747 }
3748 
3749 /*
3750  * extattr_set_vp(): Set a named extended attribute on a file or directory
3751  *
3752  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3753  *            kernelspace string pointer "attrname",
3754  *            userspace iovec array pointer "iovp", unsigned int iovcnt
3755  *            proc "p"
3756  * Returns: 0 on success, an error number otherwise
3757  * Locks: none
3758  * References: vp must be a valid reference for the duration of the call
3759  */
3760 static int
3761 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3762     struct iovec *iovp, unsigned iovcnt, struct proc *p)
3763 {
3764 	struct mount *mp;
3765 	struct uio auio;
3766 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3767 	u_int iovlen, cnt;
3768 	int error, i;
3769 
3770 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3771 		return (error);
3772 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3773 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3774 
3775 	iovlen = iovcnt * sizeof(struct iovec);
3776 	if (iovcnt > UIO_SMALLIOV) {
3777 		if (iovcnt > UIO_MAXIOV) {
3778 			error = EINVAL;
3779 			goto done;
3780 		}
3781 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3782 		needfree = iov;
3783 	} else
3784 		iov = aiov;
3785 	auio.uio_iov = iov;
3786 	auio.uio_iovcnt = iovcnt;
3787 	auio.uio_rw = UIO_WRITE;
3788 	auio.uio_segflg = UIO_USERSPACE;
3789 	auio.uio_procp = p;
3790 	auio.uio_offset = 0;
3791 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3792 		goto done;
3793 	auio.uio_resid = 0;
3794 	for (i = 0; i < iovcnt; i++) {
3795 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3796 			error = EINVAL;
3797 			goto done;
3798 		}
3799 		auio.uio_resid += iov->iov_len;
3800 		iov++;
3801 	}
3802 	cnt = auio.uio_resid;
3803 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3804 	    p->p_ucred, p);
3805 	cnt -= auio.uio_resid;
3806 	p->p_retval[0] = cnt;
3807 done:
3808 	if (needfree)
3809 		FREE(needfree, M_IOV);
3810 	VOP_UNLOCK(vp, 0, p);
3811 	vn_finished_write(mp);
3812 	return (error);
3813 }
3814 
3815 int
3816 extattr_set_file(p, uap)
3817 	struct proc *p;
3818 	struct extattr_set_file_args *uap;
3819 {
3820 	struct nameidata nd;
3821 	char attrname[EXTATTR_MAXNAMELEN];
3822 	int error;
3823 
3824 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3825 	    NULL);
3826 	if (error)
3827 		return (error);
3828 
3829 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3830 	if ((error = namei(&nd)) != 0)
3831 		return (error);
3832 	NDFREE(&nd, NDF_ONLY_PNBUF);
3833 
3834 	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3835 	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3836 
3837 	vrele(nd.ni_vp);
3838 	return (error);
3839 }
3840 
3841 int
3842 extattr_set_fd(p, uap)
3843 	struct proc *p;
3844 	struct extattr_set_fd_args *uap;
3845 {
3846 	struct file *fp;
3847 	char attrname[EXTATTR_MAXNAMELEN];
3848 	int error;
3849 
3850 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3851 	    NULL);
3852 	if (error)
3853 		return (error);
3854 
3855 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3856 		return (error);
3857 
3858 	error = extattr_set_vp((struct vnode *)fp->f_data,
3859 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3860 	    SCARG(uap, iovcnt), p);
3861 
3862 	return (error);
3863 }
3864 
3865 /*
3866  * extattr_get_vp(): Get a named extended attribute on a file or directory
3867  *
3868  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3869  *            kernelspace string pointer "attrname",
3870  *            userspace iovec array pointer "iovp", unsigned int iovcnt,
3871  *            proc "p"
3872  * Returns: 0 on success, an error number otherwise
3873  * Locks: none
3874  * References: vp must be a valid reference for the duration of the call
3875  */
3876 static int
3877 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3878     struct iovec *iovp, unsigned iovcnt, struct proc *p)
3879 {
3880 	struct uio auio;
3881 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3882 	u_int iovlen, cnt;
3883 	int error, i;
3884 
3885 	VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
3886 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3887 
3888 	iovlen = iovcnt * sizeof (struct iovec);
3889 	if (iovcnt > UIO_SMALLIOV) {
3890 		if (iovcnt > UIO_MAXIOV) {
3891 			error = EINVAL;
3892 			goto done;
3893 		}
3894 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3895 		needfree = iov;
3896 	} else
3897 		iov = aiov;
3898 	auio.uio_iov = iov;
3899 	auio.uio_iovcnt = iovcnt;
3900 	auio.uio_rw = UIO_READ;
3901 	auio.uio_segflg = UIO_USERSPACE;
3902 	auio.uio_procp = p;
3903 	auio.uio_offset = 0;
3904 	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3905 		goto done;
3906 	auio.uio_resid = 0;
3907 	for (i = 0; i < iovcnt; i++) {
3908 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3909 			error = EINVAL;
3910 			goto done;
3911 		}
3912 		auio.uio_resid += iov->iov_len;
3913 		iov++;
3914 	}
3915 	cnt = auio.uio_resid;
3916 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
3917 	    p->p_ucred, p);
3918 	cnt -= auio.uio_resid;
3919 	p->p_retval[0] = cnt;
3920 done:
3921 	if (needfree)
3922 		FREE(needfree, M_IOV);
3923 	VOP_UNLOCK(vp, 0, p);
3924 	return (error);
3925 }
3926 
3927 int
3928 extattr_get_file(p, uap)
3929 	struct proc *p;
3930 	struct extattr_get_file_args *uap;
3931 {
3932 	struct nameidata nd;
3933 	char attrname[EXTATTR_MAXNAMELEN];
3934 	int error;
3935 
3936 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3937 	    NULL);
3938 	if (error)
3939 		return (error);
3940 
3941 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3942 	if ((error = namei(&nd)) != 0)
3943 		return (error);
3944 	NDFREE(&nd, NDF_ONLY_PNBUF);
3945 
3946 	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3947 	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3948 
3949 	vrele(nd.ni_vp);
3950 	return (error);
3951 }
3952 
3953 int
3954 extattr_get_fd(p, uap)
3955 	struct proc *p;
3956 	struct extattr_get_fd_args *uap;
3957 {
3958 	struct file *fp;
3959 	char attrname[EXTATTR_MAXNAMELEN];
3960 	int error;
3961 
3962 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3963 	    NULL);
3964 	if (error)
3965 		return (error);
3966 
3967 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3968 		return (error);
3969 
3970 	error = extattr_get_vp((struct vnode *)fp->f_data,
3971 	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3972 	    SCARG(uap, iovcnt), p);
3973 
3974 	return (error);
3975 }
3976 
3977 /*
3978  * extattr_delete_vp(): Delete a named extended attribute on a file or
3979  *                      directory
3980  *
3981  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3982  *            kernelspace string pointer "attrname", proc "p"
3983  * Returns: 0 on success, an error number otherwise
3984  * Locks: none
3985  * References: vp must be a valid reference for the duration of the call
3986  */
3987 static int
3988 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3989     struct proc *p)
3990 {
3991 	struct mount *mp;
3992 	int error;
3993 
3994 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3995 		return (error);
3996 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3997 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3998 
3999 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4000 	    p->p_ucred, p);
4001 
4002 	VOP_UNLOCK(vp, 0, p);
4003 	vn_finished_write(mp);
4004 	return (error);
4005 }
4006 
4007 int
4008 extattr_delete_file(p, uap)
4009 	struct proc *p;
4010 	struct extattr_delete_file_args *uap;
4011 {
4012 	struct nameidata nd;
4013 	char attrname[EXTATTR_MAXNAMELEN];
4014 	int error;
4015 
4016 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4017 	     NULL);
4018 	if (error)
4019 		return(error);
4020 
4021 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
4022 	if ((error = namei(&nd)) != 0)
4023 		return(error);
4024 	NDFREE(&nd, NDF_ONLY_PNBUF);
4025 
4026 	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4027 	    attrname, p);
4028 
4029 	vrele(nd.ni_vp);
4030 	return(error);
4031 }
4032 
4033 int
4034 extattr_delete_fd(p, uap)
4035 	struct proc *p;
4036 	struct extattr_delete_fd_args *uap;
4037 {
4038 	struct file *fp;
4039 	char attrname[EXTATTR_MAXNAMELEN];
4040 	int error;
4041 
4042 	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4043 	    NULL);
4044 	if (error)
4045 		return (error);
4046 
4047 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
4048 		return (error);
4049 
4050 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4051 	    SCARG(uap, attrnamespace), attrname, p);
4052 
4053 	return (error);
4054 }
4055