xref: /freebsd/sys/kern/vfs_syscalls.c (revision 09e8dea79366f1e5b3a73e8a271b26e4b6bf2e6a)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 #include <machine/stdarg.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_page.h>
77 #include <vm/uma.h>
78 
79 static int change_dir(struct nameidata *ndp, struct thread *td);
80 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 static int vfs_nmount(struct thread *td, int, struct uio *);
91 
92 static int	usermount = 0;	/* if 1, non-root can mount fs. */
93 
94 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
95 
96 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
97 
98 /*
99  * Virtual File System System Calls
100  */
101 
102 #ifndef _SYS_SYSPROTO_H_
103 struct nmount_args {
104 	struct iovec    *iovp;
105 	unsigned int    iovcnt;
106 	int             flags;
107 };
108 #endif
109 /* ARGSUSED */
110 int
111 nmount(td, uap)
112 	struct thread *td;
113 	struct nmount_args /* {
114 		syscallarg(struct iovec *) iovp;
115 		syscallarg(unsigned int) iovcnt;
116 		syscallarg(int) flags;
117 	} */ *uap;
118 {
119 	struct uio auio;
120 	struct iovec *iov, *needfree;
121 	struct iovec aiov[UIO_SMALLIOV];
122 	unsigned int i;
123 	int error;
124 	u_int iovlen, iovcnt;
125 
126 	iovcnt = SCARG(uap, iovcnt);
127 	iovlen = iovcnt * sizeof (struct iovec);
128 	/*
129 	 * Check that we have an even number of iovec's
130 	 * and that we have at least two options.
131 	 */
132 	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
133 		return (EINVAL);
134 
135 	if (iovcnt > UIO_SMALLIOV) {
136 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
137 		needfree = iov;
138 	} else {
139 		iov = aiov;
140 		needfree = NULL;
141 	}
142 	auio.uio_iov = iov;
143 	auio.uio_iovcnt = iovcnt;
144 	auio.uio_segflg = UIO_USERSPACE;
145 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
146 		goto finish;
147 
148 	for (i = 0; i < iovcnt; i++) {
149 		if (iov->iov_len > MMAXOPTIONLEN) {
150 			error = EINVAL;
151 			goto finish;
152 		}
153 		iov++;
154 	}
155 	error = vfs_nmount(td, SCARG(uap, flags), &auio);
156 finish:
157 	if (needfree != NULL)
158 		free(needfree, M_TEMP);
159 	return (error);
160 }
161 
162 /*
163  * Release all resources related to the
164  * mount options.
165  */
166 void
167 vfs_freeopts(struct vfsoptlist *opts)
168 {
169 	struct vfsopt *opt;
170 
171 	while (!TAILQ_EMPTY(opts)) {
172 		opt = TAILQ_FIRST(opts);
173 		TAILQ_REMOVE(opts, opt, link);
174 		free(opt->name, M_MOUNT);
175 		free(opt->value, M_MOUNT);
176 		free(opt, M_MOUNT);
177 	}
178 	free(opts, M_MOUNT);
179 }
180 
181 int
182 kernel_mount(iovp, iovcnt, flags)
183 	struct iovec *iovp;
184 	unsigned int iovcnt;
185 	int flags;
186 {
187 	struct uio auio;
188 	int error;
189 
190 	/*
191 	 * Check that we have an even number of iovec's
192 	 * and that we have at least two options.
193 	 */
194 	if ((iovcnt & 1) || (iovcnt < 4))
195 		return (EINVAL);
196 
197 	auio.uio_iov = iovp;
198 	auio.uio_iovcnt = iovcnt;
199 	auio.uio_segflg = UIO_SYSSPACE;
200 
201 	error = vfs_nmount(curthread, flags, &auio);
202 	return (error);
203 }
204 
205 int
206 kernel_vmount(int flags, ...)
207 {
208 	struct iovec *iovp;
209 	struct uio auio;
210 	va_list ap;
211 	unsigned int iovcnt, iovlen, len;
212 	const char *cp;
213 	char *buf, *pos;
214 	size_t n;
215 	int error, i;
216 
217 	len = 0;
218 	va_start(ap, flags);
219 	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
220 		len += strlen(cp) + 1;
221 	va_end(ap);
222 
223 	if (iovcnt < 4 || iovcnt & 1)
224 		return (EINVAL);
225 
226 	iovlen = iovcnt * sizeof (struct iovec);
227 	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
228 	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
229 	pos = buf;
230 	va_start(ap, flags);
231 	for (i = 0; i < iovcnt; i++) {
232 		cp = va_arg(ap, const char *);
233 		copystr(cp, pos, len - (pos - buf), &n);
234 		iovp[i].iov_base = pos;
235 		iovp[i].iov_len = n;
236 		pos += n;
237 	}
238 	va_end(ap);
239 
240 	auio.uio_iov = iovp;
241 	auio.uio_iovcnt = iovcnt;
242 	auio.uio_segflg = UIO_SYSSPACE;
243 
244 	error = vfs_nmount(curthread, flags, &auio);
245 	FREE(iovp, M_MOUNT);
246 	FREE(buf, M_MOUNT);
247 	return (error);
248 }
249 
250 /*
251  * vfs_nmount(): actually attempt a filesystem mount.
252  */
253 static int
254 vfs_nmount(td, fsflags, fsoptions)
255 	struct thread *td;
256 	int fsflags;		/* Flags common to all filesystems. */
257 	struct uio *fsoptions;	/* Options local to the filesystem. */
258 {
259 	linker_file_t lf;
260 	struct vnode *vp;
261 	struct mount *mp;
262 	struct vfsconf *vfsp;
263 	struct vfsoptlist *optlist;
264 	char *fstype, *fspath;
265 	int error, flag = 0, kern_flag = 0;
266 	int fstypelen, fspathlen;
267 	struct vattr va;
268 	struct nameidata nd;
269 
270 	error = vfs_buildopts(fsoptions, &optlist);
271 	if (error)
272 		return (error);
273 
274 	/*
275 	 * We need these two options before the others,
276 	 * and they are mandatory for any filesystem.
277 	 * Ensure they are NUL terminated as well.
278 	 */
279 	fstypelen = 0;
280 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
281 	if (error || fstype[fstypelen - 1] != '\0') {
282 		error = EINVAL;
283 		goto bad;
284 	}
285 	fspathlen = 0;
286 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
287 	if (error || fspath[fspathlen - 1] != '\0') {
288 		error = EINVAL;
289 		goto bad;
290 	}
291 
292 	/*
293 	 * Be ultra-paranoid about making sure the type and fspath
294 	 * variables will fit in our mp buffers, including the
295 	 * terminating NUL.
296 	 */
297 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
298 		error = ENAMETOOLONG;
299 		goto bad;
300 	}
301 
302 	if (usermount == 0) {
303 	       	error = suser(td);
304 		if (error)
305 			goto bad;
306 	}
307 	/*
308 	 * Do not allow NFS export by non-root users.
309 	 */
310 	if (fsflags & MNT_EXPORTED) {
311 		error = suser(td);
312 		if (error)
313 			goto bad;
314 	}
315 	/*
316 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
317 	 */
318 	if (suser(td))
319 		fsflags |= MNT_NOSUID | MNT_NODEV;
320 	/*
321 	 * Get vnode to be covered
322 	 */
323 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
324 	if ((error = namei(&nd)) != 0)
325 		goto bad;
326 	NDFREE(&nd, NDF_ONLY_PNBUF);
327 	vp = nd.ni_vp;
328 	if (fsflags & MNT_UPDATE) {
329 		if ((vp->v_flag & VROOT) == 0) {
330 			vput(vp);
331 			error = EINVAL;
332 			goto bad;
333 		}
334 		mp = vp->v_mount;
335 		flag = mp->mnt_flag;
336 		kern_flag = mp->mnt_kern_flag;
337 		/*
338 		 * We only allow the filesystem to be reloaded if it
339 		 * is currently mounted read-only.
340 		 */
341 		if ((fsflags & MNT_RELOAD) &&
342 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
343 			vput(vp);
344 			error = EOPNOTSUPP;	/* Needs translation */
345 			goto bad;
346 		}
347 		/*
348 		 * Only root, or the user that did the original mount is
349 		 * permitted to update it.
350 		 */
351 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
352 			error = suser(td);
353 			if (error) {
354 				vput(vp);
355 				goto bad;
356 			}
357 		}
358 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
359 			vput(vp);
360 			error = EBUSY;
361 			goto bad;
362 		}
363 		mtx_lock(&vp->v_interlock);
364 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
365 			mtx_unlock(&vp->v_interlock);
366 			vfs_unbusy(mp, td);
367 			vput(vp);
368 			error = EBUSY;
369 			goto bad;
370 		}
371 		vp->v_flag |= VMOUNT;
372 		mtx_unlock(&vp->v_interlock);
373 		mp->mnt_flag |= fsflags &
374 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
375 		VOP_UNLOCK(vp, 0, td);
376 		goto update;
377 	}
378 	/*
379 	 * If the user is not root, ensure that they own the directory
380 	 * onto which we are attempting to mount.
381 	 */
382 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
383 	if (error) {
384 		vput(vp);
385 		goto bad;
386 	}
387 	if (va.va_uid != td->td_ucred->cr_uid) {
388 		error = suser(td);
389 		if (error) {
390 			vput(vp);
391 			goto bad;
392 		}
393 	}
394 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
395 		vput(vp);
396 		goto bad;
397 	}
398 	if (vp->v_type != VDIR) {
399 		vput(vp);
400 		error = ENOTDIR;
401 		goto bad;
402 	}
403 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
404 		if (!strcmp(vfsp->vfc_name, fstype))
405 			break;
406 	if (vfsp == NULL) {
407 		/* Only load modules for root (very important!). */
408 		error = suser(td);
409 		if (error) {
410 			vput(vp);
411 			goto bad;
412 		}
413 		error = securelevel_gt(td->td_ucred, 0);
414 		if (error) {
415 			vput(vp);
416 			goto bad;
417 		}
418 		error = linker_load_file(fstype, &lf);
419 		if (error || lf == NULL) {
420 			vput(vp);
421 			if (lf == NULL)
422 				error = ENODEV;
423 			goto bad;
424 		}
425 		lf->userrefs++;
426 		/* Look up again to see if the VFS was loaded. */
427 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
428 			if (!strcmp(vfsp->vfc_name, fstype))
429 				break;
430 		if (vfsp == NULL) {
431 			lf->userrefs--;
432 			linker_file_unload(lf);
433 			vput(vp);
434 			error = ENODEV;
435 			goto bad;
436 		}
437 	}
438 	mtx_lock(&vp->v_interlock);
439 	if ((vp->v_flag & VMOUNT) != 0 ||
440 	    vp->v_mountedhere != NULL) {
441 		mtx_unlock(&vp->v_interlock);
442 		vput(vp);
443 		error = EBUSY;
444 		goto bad;
445 	}
446 	vp->v_flag |= VMOUNT;
447 	mtx_unlock(&vp->v_interlock);
448 
449 	/*
450 	 * Allocate and initialize the filesystem.
451 	 */
452 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
453 	TAILQ_INIT(&mp->mnt_nvnodelist);
454 	TAILQ_INIT(&mp->mnt_reservedvnlist);
455 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
456 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
457 	mp->mnt_op = vfsp->vfc_vfsops;
458 	mp->mnt_vfc = vfsp;
459 	vfsp->vfc_refcount++;
460 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
461 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
462 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
463 	mp->mnt_vnodecovered = vp;
464 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
465 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
466 	mp->mnt_iosize_max = DFLTPHYS;
467 	VOP_UNLOCK(vp, 0, td);
468 
469 update:
470 	mp->mnt_optnew = optlist;
471 	/*
472 	 * Check if the fs implements the new VFS_NMOUNT()
473 	 * function, since the new system call was used.
474 	 */
475 	if (mp->mnt_op->vfs_mount != NULL) {
476 		printf("%s doesn't support the new mount syscall\n",
477 		    mp->mnt_vfc->vfc_name);
478 		mtx_lock(&vp->v_interlock);
479 		vp->v_flag &= ~VMOUNT;
480 		mtx_unlock(&vp->v_interlock);
481 		if (mp->mnt_flag & MNT_UPDATE)
482 			vfs_unbusy(mp, td);
483 		else {
484 			mp->mnt_vfc->vfc_refcount--;
485 			vfs_unbusy(mp, td);
486 			free((caddr_t)mp, M_MOUNT);
487 		}
488 		vrele(vp);
489 		error = EOPNOTSUPP;
490 		goto bad;
491 	}
492 
493 	/*
494 	 * Set the mount level flags.
495 	 */
496 	if (fsflags & MNT_RDONLY)
497 		mp->mnt_flag |= MNT_RDONLY;
498 	else if (mp->mnt_flag & MNT_RDONLY)
499 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
500 	mp->mnt_flag &=~ MNT_UPDATEMASK;
501 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
502 	/*
503 	 * Mount the filesystem.
504 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
505 	 * get.  No freeing of cn_pnbuf.
506 	 */
507 	error = VFS_NMOUNT(mp, &nd, td);
508 	if (!error) {
509 		if (mp->mnt_opt != NULL)
510 			vfs_freeopts(mp->mnt_opt);
511 		mp->mnt_opt = mp->mnt_optnew;
512 	}
513 	/*
514 	 * Prevent external consumers of mount
515 	 * options to read mnt_optnew.
516 	 */
517 	mp->mnt_optnew = NULL;
518 	if (mp->mnt_flag & MNT_UPDATE) {
519 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
520 			mp->mnt_flag &= ~MNT_RDONLY;
521 		mp->mnt_flag &=~
522 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
523 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
524 		if (error) {
525 			mp->mnt_flag = flag;
526 			mp->mnt_kern_flag = kern_flag;
527 		}
528 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
529 			if (mp->mnt_syncer == NULL)
530 				error = vfs_allocate_syncvnode(mp);
531 		} else {
532 			if (mp->mnt_syncer != NULL)
533 				vrele(mp->mnt_syncer);
534 			mp->mnt_syncer = NULL;
535 		}
536 		vfs_unbusy(mp, td);
537 		mtx_lock(&vp->v_interlock);
538 		vp->v_flag &= ~VMOUNT;
539 		mtx_unlock(&vp->v_interlock);
540 		vrele(vp);
541 		return (error);
542 	}
543 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
544 	/*
545 	 * Put the new filesystem on the mount list after root.
546 	 */
547 	cache_purge(vp);
548 	if (!error) {
549 		struct vnode *newdp;
550 
551 		mtx_lock(&vp->v_interlock);
552 		vp->v_flag &= ~VMOUNT;
553 		vp->v_mountedhere = mp;
554 		mtx_unlock(&vp->v_interlock);
555 		mtx_lock(&mountlist_mtx);
556 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
557 		mtx_unlock(&mountlist_mtx);
558 		if (VFS_ROOT(mp, &newdp))
559 			panic("mount: lost mount");
560 		checkdirs(vp, newdp);
561 		vput(newdp);
562 		VOP_UNLOCK(vp, 0, td);
563 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
564 			error = vfs_allocate_syncvnode(mp);
565 		vfs_unbusy(mp, td);
566 		if ((error = VFS_START(mp, 0, td)) != 0) {
567 			vrele(vp);
568 			goto bad;
569 		}
570 	} else {
571 		mtx_lock(&vp->v_interlock);
572 		vp->v_flag &= ~VMOUNT;
573 		mtx_unlock(&vp->v_interlock);
574 		mp->mnt_vfc->vfc_refcount--;
575 		vfs_unbusy(mp, td);
576 		free((caddr_t)mp, M_MOUNT);
577 		vput(vp);
578 		goto bad;
579 	}
580 	return (0);
581 bad:
582 	vfs_freeopts(optlist);
583 	return (error);
584 }
585 
586 /*
587  * Old Mount API.
588  */
589 #ifndef _SYS_SYSPROTO_H_
590 struct mount_args {
591 	char	*type;
592 	char	*path;
593 	int	flags;
594 	caddr_t	data;
595 };
596 #endif
597 /* ARGSUSED */
598 int
599 mount(td, uap)
600 	struct thread *td;
601 	struct mount_args /* {
602 		syscallarg(char *) type;
603 		syscallarg(char *) path;
604 		syscallarg(int) flags;
605 		syscallarg(caddr_t) data;
606 	} */ *uap;
607 {
608 	char *fstype;
609 	char *fspath;
610 	int error;
611 
612 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
613 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
614 
615 	/*
616 	 * vfs_mount() actually takes a kernel string for `type' and
617 	 * `path' now, so extract them.
618 	 */
619 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
620 	if (error)
621 		goto finish;
622 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
623 	if (error)
624 		goto finish;
625 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
626 	    SCARG(uap, data));
627 finish:
628 	free(fstype, M_TEMP);
629 	free(fspath, M_TEMP);
630 	return (error);
631 }
632 
633 /*
634  * vfs_mount(): actually attempt a filesystem mount.
635  *
636  * This routine is designed to be a "generic" entry point for routines
637  * that wish to mount a filesystem. All parameters except `fsdata' are
638  * pointers into kernel space. `fsdata' is currently still a pointer
639  * into userspace.
640  */
641 int
642 vfs_mount(td, fstype, fspath, fsflags, fsdata)
643 	struct thread *td;
644 	const char *fstype;
645 	char *fspath;
646 	int fsflags;
647 	void *fsdata;
648 {
649 	linker_file_t lf;
650 	struct vnode *vp;
651 	struct mount *mp;
652 	struct vfsconf *vfsp;
653 	int error, flag = 0, kern_flag = 0;
654 	struct vattr va;
655 	struct nameidata nd;
656 
657 	/*
658 	 * Be ultra-paranoid about making sure the type and fspath
659 	 * variables will fit in our mp buffers, including the
660 	 * terminating NUL.
661 	 */
662 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
663 		return (ENAMETOOLONG);
664 
665 	if (usermount == 0) {
666 		error = suser(td);
667 		if (error)
668 			return (error);
669 	}
670 	/*
671 	 * Do not allow NFS export by non-root users.
672 	 */
673 	if (fsflags & MNT_EXPORTED) {
674 		error = suser(td);
675 		if (error)
676 			return (error);
677 	}
678 	/*
679 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
680 	 */
681 	if (suser(td))
682 		fsflags |= MNT_NOSUID | MNT_NODEV;
683 	/*
684 	 * Get vnode to be covered
685 	 */
686 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
687 	if ((error = namei(&nd)) != 0)
688 		return (error);
689 	NDFREE(&nd, NDF_ONLY_PNBUF);
690 	vp = nd.ni_vp;
691 	if (fsflags & MNT_UPDATE) {
692 		if ((vp->v_flag & VROOT) == 0) {
693 			vput(vp);
694 			return (EINVAL);
695 		}
696 		mp = vp->v_mount;
697 		flag = mp->mnt_flag;
698 		kern_flag = mp->mnt_kern_flag;
699 		/*
700 		 * We only allow the filesystem to be reloaded if it
701 		 * is currently mounted read-only.
702 		 */
703 		if ((fsflags & MNT_RELOAD) &&
704 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
705 			vput(vp);
706 			return (EOPNOTSUPP);	/* Needs translation */
707 		}
708 		/*
709 		 * Only root, or the user that did the original mount is
710 		 * permitted to update it.
711 		 */
712 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
713 			error = suser(td);
714 			if (error) {
715 				vput(vp);
716 				return (error);
717 			}
718 		}
719 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
720 			vput(vp);
721 			return (EBUSY);
722 		}
723 		mtx_lock(&vp->v_interlock);
724 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
725 			mtx_unlock(&vp->v_interlock);
726 			vfs_unbusy(mp, td);
727 			vput(vp);
728 			return (EBUSY);
729 		}
730 		vp->v_flag |= VMOUNT;
731 		mtx_unlock(&vp->v_interlock);
732 		mp->mnt_flag |= fsflags &
733 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
734 		VOP_UNLOCK(vp, 0, td);
735 		goto update;
736 	}
737 	/*
738 	 * If the user is not root, ensure that they own the directory
739 	 * onto which we are attempting to mount.
740 	 */
741 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
742 	if (error) {
743 		vput(vp);
744 		return (error);
745 	}
746 	if (va.va_uid != td->td_ucred->cr_uid) {
747 		error = suser(td);
748 		if (error) {
749 			vput(vp);
750 			return (error);
751 		}
752 	}
753 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
754 		vput(vp);
755 		return (error);
756 	}
757 	if (vp->v_type != VDIR) {
758 		vput(vp);
759 		return (ENOTDIR);
760 	}
761 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
762 		if (!strcmp(vfsp->vfc_name, fstype))
763 			break;
764 	if (vfsp == NULL) {
765 		/* Only load modules for root (very important!). */
766 		error = suser(td);
767 		if (error) {
768 			vput(vp);
769 			return (error);
770 		}
771 		error = securelevel_gt(td->td_ucred, 0);
772 		if (error) {
773 			vput(vp);
774 			return (error);
775 		}
776 		error = linker_load_file(fstype, &lf);
777 		if (error || lf == NULL) {
778 			vput(vp);
779 			if (lf == NULL)
780 				error = ENODEV;
781 			return (error);
782 		}
783 		lf->userrefs++;
784 		/* Look up again to see if the VFS was loaded. */
785 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
786 			if (!strcmp(vfsp->vfc_name, fstype))
787 				break;
788 		if (vfsp == NULL) {
789 			lf->userrefs--;
790 			linker_file_unload(lf);
791 			vput(vp);
792 			return (ENODEV);
793 		}
794 	}
795 	mtx_lock(&vp->v_interlock);
796 	if ((vp->v_flag & VMOUNT) != 0 ||
797 	    vp->v_mountedhere != NULL) {
798 		mtx_unlock(&vp->v_interlock);
799 		vput(vp);
800 		return (EBUSY);
801 	}
802 	vp->v_flag |= VMOUNT;
803 	mtx_unlock(&vp->v_interlock);
804 
805 	/*
806 	 * Allocate and initialize the filesystem.
807 	 */
808 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
809 	TAILQ_INIT(&mp->mnt_nvnodelist);
810 	TAILQ_INIT(&mp->mnt_reservedvnlist);
811 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
812 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
813 	mp->mnt_op = vfsp->vfc_vfsops;
814 	mp->mnt_vfc = vfsp;
815 	vfsp->vfc_refcount++;
816 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
817 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
818 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
819 	mp->mnt_vnodecovered = vp;
820 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
821 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
822 	mp->mnt_iosize_max = DFLTPHYS;
823 	VOP_UNLOCK(vp, 0, td);
824 update:
825 	/*
826 	 * Check if the fs implements the old VFS_MOUNT()
827 	 * function, since the old system call was used.
828 	 */
829 	if (mp->mnt_op->vfs_mount == NULL) {
830 		printf("%s doesn't support the old mount syscall\n",
831 		    mp->mnt_vfc->vfc_name);
832 		mtx_lock(&vp->v_interlock);
833 		vp->v_flag &= ~VMOUNT;
834 		mtx_unlock(&vp->v_interlock);
835 		if (mp->mnt_flag & MNT_UPDATE)
836 			vfs_unbusy(mp, td);
837 		else {
838 			mp->mnt_vfc->vfc_refcount--;
839 			vfs_unbusy(mp, td);
840 			free((caddr_t)mp, M_MOUNT);
841 		}
842 		vrele(vp);
843 		return (EOPNOTSUPP);
844 	}
845 
846 	/*
847 	 * Set the mount level flags.
848 	 */
849 	if (fsflags & MNT_RDONLY)
850 		mp->mnt_flag |= MNT_RDONLY;
851 	else if (mp->mnt_flag & MNT_RDONLY)
852 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
853 	mp->mnt_flag &=~ MNT_UPDATEMASK;
854 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
855 	/*
856 	 * Mount the filesystem.
857 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
858 	 * get.  No freeing of cn_pnbuf.
859 	 */
860 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
861 	if (mp->mnt_flag & MNT_UPDATE) {
862 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
863 			mp->mnt_flag &= ~MNT_RDONLY;
864 		mp->mnt_flag &=~
865 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
866 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
867 		if (error) {
868 			mp->mnt_flag = flag;
869 			mp->mnt_kern_flag = kern_flag;
870 		}
871 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
872 			if (mp->mnt_syncer == NULL)
873 				error = vfs_allocate_syncvnode(mp);
874 		} else {
875 			if (mp->mnt_syncer != NULL)
876 				vrele(mp->mnt_syncer);
877 			mp->mnt_syncer = NULL;
878 		}
879 		vfs_unbusy(mp, td);
880 		mtx_lock(&vp->v_interlock);
881 		vp->v_flag &= ~VMOUNT;
882 		mtx_unlock(&vp->v_interlock);
883 		vrele(vp);
884 		return (error);
885 	}
886 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
887 	/*
888 	 * Put the new filesystem on the mount list after root.
889 	 */
890 	cache_purge(vp);
891 	if (!error) {
892 		struct vnode *newdp;
893 
894 		mtx_lock(&vp->v_interlock);
895 		vp->v_flag &= ~VMOUNT;
896 		vp->v_mountedhere = mp;
897 		mtx_unlock(&vp->v_interlock);
898 		mtx_lock(&mountlist_mtx);
899 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
900 		mtx_unlock(&mountlist_mtx);
901 		if (VFS_ROOT(mp, &newdp))
902 			panic("mount: lost mount");
903 		checkdirs(vp, newdp);
904 		vput(newdp);
905 		VOP_UNLOCK(vp, 0, td);
906 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
907 			error = vfs_allocate_syncvnode(mp);
908 		vfs_unbusy(mp, td);
909 		if ((error = VFS_START(mp, 0, td)) != 0)
910 			vrele(vp);
911 	} else {
912 		mtx_lock(&vp->v_interlock);
913 		vp->v_flag &= ~VMOUNT;
914 		mtx_unlock(&vp->v_interlock);
915 		mp->mnt_vfc->vfc_refcount--;
916 		vfs_unbusy(mp, td);
917 		free((caddr_t)mp, M_MOUNT);
918 		vput(vp);
919 	}
920 	return (error);
921 }
922 
923 /*
924  * Scan all active processes to see if any of them have a current
925  * or root directory of `olddp'. If so, replace them with the new
926  * mount point.
927  */
928 static void
929 checkdirs(olddp, newdp)
930 	struct vnode *olddp, *newdp;
931 {
932 	struct filedesc *fdp;
933 	struct proc *p;
934 	int nrele;
935 
936 	if (olddp->v_usecount == 1)
937 		return;
938 	sx_slock(&allproc_lock);
939 	LIST_FOREACH(p, &allproc, p_list) {
940 		PROC_LOCK(p);
941 		fdp = p->p_fd;
942 		if (fdp == NULL) {
943 			PROC_UNLOCK(p);
944 			continue;
945 		}
946 		nrele = 0;
947 		FILEDESC_LOCK(fdp);
948 		if (fdp->fd_cdir == olddp) {
949 			VREF(newdp);
950 			fdp->fd_cdir = newdp;
951 			nrele++;
952 		}
953 		if (fdp->fd_rdir == olddp) {
954 			VREF(newdp);
955 			fdp->fd_rdir = newdp;
956 			nrele++;
957 		}
958 		FILEDESC_UNLOCK(fdp);
959 		PROC_UNLOCK(p);
960 		while (nrele--)
961 			vrele(olddp);
962 	}
963 	sx_sunlock(&allproc_lock);
964 	if (rootvnode == olddp) {
965 		vrele(rootvnode);
966 		VREF(newdp);
967 		rootvnode = newdp;
968 	}
969 }
970 
971 /*
972  * Unmount a filesystem.
973  *
974  * Note: unmount takes a path to the vnode mounted on as argument,
975  * not special file (as before).
976  */
977 #ifndef _SYS_SYSPROTO_H_
978 struct unmount_args {
979 	char	*path;
980 	int	flags;
981 };
982 #endif
983 /* ARGSUSED */
984 int
985 unmount(td, uap)
986 	struct thread *td;
987 	register struct unmount_args /* {
988 		syscallarg(char *) path;
989 		syscallarg(int) flags;
990 	} */ *uap;
991 {
992 	register struct vnode *vp;
993 	struct mount *mp;
994 	int error;
995 	struct nameidata nd;
996 
997 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
998 	    SCARG(uap, path), td);
999 	if ((error = namei(&nd)) != 0)
1000 		return (error);
1001 	vp = nd.ni_vp;
1002 	NDFREE(&nd, NDF_ONLY_PNBUF);
1003 	mp = vp->v_mount;
1004 
1005 	/*
1006 	 * Only root, or the user that did the original mount is
1007 	 * permitted to unmount this filesystem.
1008 	 */
1009 	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1010 		error = suser(td);
1011 		if (error) {
1012 			vput(vp);
1013 			return (error);
1014 		}
1015 	}
1016 
1017 	/*
1018 	 * Don't allow unmounting the root filesystem.
1019 	 */
1020 	if (mp->mnt_flag & MNT_ROOTFS) {
1021 		vput(vp);
1022 		return (EINVAL);
1023 	}
1024 
1025 	/*
1026 	 * Must be the root of the filesystem
1027 	 */
1028 	if ((vp->v_flag & VROOT) == 0) {
1029 		vput(vp);
1030 		return (EINVAL);
1031 	}
1032 	vput(vp);
1033 	return (dounmount(mp, SCARG(uap, flags), td));
1034 }
1035 
1036 /*
1037  * Do the actual filesystem unmount.
1038  */
1039 int
1040 dounmount(mp, flags, td)
1041 	struct mount *mp;
1042 	int flags;
1043 	struct thread *td;
1044 {
1045 	struct vnode *coveredvp, *fsrootvp;
1046 	int error;
1047 	int async_flag;
1048 
1049 	mtx_lock(&mountlist_mtx);
1050 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1051 		mtx_unlock(&mountlist_mtx);
1052 		return (EBUSY);
1053 	}
1054 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1055 	/* Allow filesystems to detect that a forced unmount is in progress. */
1056 	if (flags & MNT_FORCE)
1057 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1058 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1059 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1060 	if (error) {
1061 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1062 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1063 			wakeup((caddr_t)mp);
1064 		return (error);
1065 	}
1066 	vn_start_write(NULL, &mp, V_WAIT);
1067 
1068 	if (mp->mnt_flag & MNT_EXPUBLIC)
1069 		vfs_setpublicfs(NULL, NULL, NULL);
1070 
1071 	vfs_msync(mp, MNT_WAIT);
1072 	async_flag = mp->mnt_flag & MNT_ASYNC;
1073 	mp->mnt_flag &=~ MNT_ASYNC;
1074 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1075 	if (mp->mnt_syncer != NULL)
1076 		vrele(mp->mnt_syncer);
1077 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1078 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1079 		if (mp->mnt_vnodecovered != NULL)
1080 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1081 		if (fsrootvp == rootvnode) {
1082 			vrele(rootvnode);
1083 			rootvnode = NULL;
1084 		}
1085 		vput(fsrootvp);
1086 	}
1087 	if (((mp->mnt_flag & MNT_RDONLY) ||
1088 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1089 	    (flags & MNT_FORCE)) {
1090 		error = VFS_UNMOUNT(mp, flags, td);
1091 	}
1092 	vn_finished_write(mp);
1093 	if (error) {
1094 		/* Undo cdir/rdir and rootvnode changes made above. */
1095 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1096 			if (mp->mnt_vnodecovered != NULL)
1097 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1098 			if (rootvnode == NULL) {
1099 				rootvnode = fsrootvp;
1100 				vref(rootvnode);
1101 			}
1102 			vput(fsrootvp);
1103 		}
1104 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1105 			(void) vfs_allocate_syncvnode(mp);
1106 		mtx_lock(&mountlist_mtx);
1107 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1108 		mp->mnt_flag |= async_flag;
1109 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1110 		    &mountlist_mtx, td);
1111 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1112 			wakeup((caddr_t)mp);
1113 		return (error);
1114 	}
1115 	mtx_lock(&mountlist_mtx);
1116 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1117 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1118 		coveredvp->v_mountedhere = NULL;
1119 	mp->mnt_vfc->vfc_refcount--;
1120 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1121 		panic("unmount: dangling vnode");
1122 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1123 	lockdestroy(&mp->mnt_lock);
1124 	if (coveredvp != NULL)
1125 		vrele(coveredvp);
1126 	if (mp->mnt_kern_flag & MNTK_MWAIT)
1127 		wakeup((caddr_t)mp);
1128 	if (mp->mnt_op->vfs_mount == NULL)
1129 		vfs_freeopts(mp->mnt_opt);
1130 	free((caddr_t)mp, M_MOUNT);
1131 	return (0);
1132 }
1133 
1134 /*
1135  * Sync each mounted filesystem.
1136  */
1137 #ifndef _SYS_SYSPROTO_H_
1138 struct sync_args {
1139         int     dummy;
1140 };
1141 #endif
1142 
1143 #ifdef DEBUG
1144 static int syncprt = 0;
1145 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1146 #endif
1147 
1148 /* ARGSUSED */
1149 int
1150 sync(td, uap)
1151 	struct thread *td;
1152 	struct sync_args *uap;
1153 {
1154 	struct mount *mp, *nmp;
1155 	int asyncflag;
1156 
1157 	mtx_lock(&mountlist_mtx);
1158 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1159 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1160 			nmp = TAILQ_NEXT(mp, mnt_list);
1161 			continue;
1162 		}
1163 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
1164 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
1165 			asyncflag = mp->mnt_flag & MNT_ASYNC;
1166 			mp->mnt_flag &= ~MNT_ASYNC;
1167 			vfs_msync(mp, MNT_NOWAIT);
1168 			VFS_SYNC(mp, MNT_NOWAIT,
1169 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
1170 			mp->mnt_flag |= asyncflag;
1171 			vn_finished_write(mp);
1172 		}
1173 		mtx_lock(&mountlist_mtx);
1174 		nmp = TAILQ_NEXT(mp, mnt_list);
1175 		vfs_unbusy(mp, td);
1176 	}
1177 	mtx_unlock(&mountlist_mtx);
1178 #if 0
1179 /*
1180  * XXX don't call vfs_bufstats() yet because that routine
1181  * was not imported in the Lite2 merge.
1182  */
1183 #ifdef DIAGNOSTIC
1184 	if (syncprt)
1185 		vfs_bufstats();
1186 #endif /* DIAGNOSTIC */
1187 #endif
1188 	return (0);
1189 }
1190 
1191 /* XXX PRISON: could be per prison flag */
1192 static int prison_quotas;
1193 #if 0
1194 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1195 #endif
1196 
1197 /*
1198  * Change filesystem quotas.
1199  */
1200 #ifndef _SYS_SYSPROTO_H_
1201 struct quotactl_args {
1202 	char *path;
1203 	int cmd;
1204 	int uid;
1205 	caddr_t arg;
1206 };
1207 #endif
1208 /* ARGSUSED */
1209 int
1210 quotactl(td, uap)
1211 	struct thread *td;
1212 	register struct quotactl_args /* {
1213 		syscallarg(char *) path;
1214 		syscallarg(int) cmd;
1215 		syscallarg(int) uid;
1216 		syscallarg(caddr_t) arg;
1217 	} */ *uap;
1218 {
1219 	struct mount *mp;
1220 	int error;
1221 	struct nameidata nd;
1222 
1223 	if (jailed(td->td_ucred) && !prison_quotas)
1224 		return (EPERM);
1225 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1226 	if ((error = namei(&nd)) != 0)
1227 		return (error);
1228 	NDFREE(&nd, NDF_ONLY_PNBUF);
1229 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
1230 	vrele(nd.ni_vp);
1231 	if (error)
1232 		return (error);
1233 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
1234 	    SCARG(uap, arg), td);
1235 	vn_finished_write(mp);
1236 	return (error);
1237 }
1238 
1239 /*
1240  * Get filesystem statistics.
1241  */
1242 #ifndef _SYS_SYSPROTO_H_
1243 struct statfs_args {
1244 	char *path;
1245 	struct statfs *buf;
1246 };
1247 #endif
1248 /* ARGSUSED */
1249 int
1250 statfs(td, uap)
1251 	struct thread *td;
1252 	register struct statfs_args /* {
1253 		syscallarg(char *) path;
1254 		syscallarg(struct statfs *) buf;
1255 	} */ *uap;
1256 {
1257 	register struct mount *mp;
1258 	register struct statfs *sp;
1259 	int error;
1260 	struct nameidata nd;
1261 	struct statfs sb;
1262 
1263 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1264 	if ((error = namei(&nd)) != 0)
1265 		return (error);
1266 	mp = nd.ni_vp->v_mount;
1267 	sp = &mp->mnt_stat;
1268 	NDFREE(&nd, NDF_ONLY_PNBUF);
1269 	vrele(nd.ni_vp);
1270 	error = VFS_STATFS(mp, sp, td);
1271 	if (error)
1272 		return (error);
1273 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1274 	if (suser(td)) {
1275 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1276 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1277 		sp = &sb;
1278 	}
1279 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1280 }
1281 
1282 /*
1283  * Get filesystem statistics.
1284  */
1285 #ifndef _SYS_SYSPROTO_H_
1286 struct fstatfs_args {
1287 	int fd;
1288 	struct statfs *buf;
1289 };
1290 #endif
1291 /* ARGSUSED */
1292 int
1293 fstatfs(td, uap)
1294 	struct thread *td;
1295 	register struct fstatfs_args /* {
1296 		syscallarg(int) fd;
1297 		syscallarg(struct statfs *) buf;
1298 	} */ *uap;
1299 {
1300 	struct file *fp;
1301 	struct mount *mp;
1302 	register struct statfs *sp;
1303 	int error;
1304 	struct statfs sb;
1305 
1306 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1307 		return (error);
1308 	mp = ((struct vnode *)fp->f_data)->v_mount;
1309 	fdrop(fp, td);
1310 	if (mp == NULL)
1311 		return (EBADF);
1312 	sp = &mp->mnt_stat;
1313 	error = VFS_STATFS(mp, sp, td);
1314 	if (error)
1315 		return (error);
1316 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1317 	if (suser(td)) {
1318 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1319 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1320 		sp = &sb;
1321 	}
1322 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1323 }
1324 
1325 /*
1326  * Get statistics on all filesystems.
1327  */
1328 #ifndef _SYS_SYSPROTO_H_
1329 struct getfsstat_args {
1330 	struct statfs *buf;
1331 	long bufsize;
1332 	int flags;
1333 };
1334 #endif
1335 int
1336 getfsstat(td, uap)
1337 	struct thread *td;
1338 	register struct getfsstat_args /* {
1339 		syscallarg(struct statfs *) buf;
1340 		syscallarg(long) bufsize;
1341 		syscallarg(int) flags;
1342 	} */ *uap;
1343 {
1344 	register struct mount *mp, *nmp;
1345 	register struct statfs *sp;
1346 	caddr_t sfsp;
1347 	long count, maxcount, error;
1348 
1349 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
1350 	sfsp = (caddr_t)SCARG(uap, buf);
1351 	count = 0;
1352 	mtx_lock(&mountlist_mtx);
1353 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1354 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1355 			nmp = TAILQ_NEXT(mp, mnt_list);
1356 			continue;
1357 		}
1358 		if (sfsp && count < maxcount) {
1359 			sp = &mp->mnt_stat;
1360 			/*
1361 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1362 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1363 			 * overrides MNT_WAIT.
1364 			 */
1365 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1366 			    (SCARG(uap, flags) & MNT_WAIT)) &&
1367 			    (error = VFS_STATFS(mp, sp, td))) {
1368 				mtx_lock(&mountlist_mtx);
1369 				nmp = TAILQ_NEXT(mp, mnt_list);
1370 				vfs_unbusy(mp, td);
1371 				continue;
1372 			}
1373 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1374 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
1375 			if (error) {
1376 				vfs_unbusy(mp, td);
1377 				return (error);
1378 			}
1379 			sfsp += sizeof(*sp);
1380 		}
1381 		count++;
1382 		mtx_lock(&mountlist_mtx);
1383 		nmp = TAILQ_NEXT(mp, mnt_list);
1384 		vfs_unbusy(mp, td);
1385 	}
1386 	mtx_unlock(&mountlist_mtx);
1387 	if (sfsp && count > maxcount)
1388 		td->td_retval[0] = maxcount;
1389 	else
1390 		td->td_retval[0] = count;
1391 	return (0);
1392 }
1393 
1394 /*
1395  * Change current working directory to a given file descriptor.
1396  */
1397 #ifndef _SYS_SYSPROTO_H_
1398 struct fchdir_args {
1399 	int	fd;
1400 };
1401 #endif
1402 /* ARGSUSED */
1403 int
1404 fchdir(td, uap)
1405 	struct thread *td;
1406 	struct fchdir_args /* {
1407 		syscallarg(int) fd;
1408 	} */ *uap;
1409 {
1410 	register struct filedesc *fdp = td->td_proc->p_fd;
1411 	struct vnode *vp, *tdp, *vpold;
1412 	struct mount *mp;
1413 	struct file *fp;
1414 	int error;
1415 
1416 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1417 		return (error);
1418 	vp = (struct vnode *)fp->f_data;
1419 	VREF(vp);
1420 	fdrop(fp, td);
1421 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1422 	if (vp->v_type != VDIR)
1423 		error = ENOTDIR;
1424 	else
1425 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1426 	while (!error && (mp = vp->v_mountedhere) != NULL) {
1427 		if (vfs_busy(mp, 0, 0, td))
1428 			continue;
1429 		error = VFS_ROOT(mp, &tdp);
1430 		vfs_unbusy(mp, td);
1431 		if (error)
1432 			break;
1433 		vput(vp);
1434 		vp = tdp;
1435 	}
1436 	if (error) {
1437 		vput(vp);
1438 		return (error);
1439 	}
1440 	VOP_UNLOCK(vp, 0, td);
1441 	FILEDESC_LOCK(fdp);
1442 	vpold = fdp->fd_cdir;
1443 	fdp->fd_cdir = vp;
1444 	FILEDESC_UNLOCK(fdp);
1445 	vrele(vpold);
1446 	return (0);
1447 }
1448 
1449 /*
1450  * Change current working directory (``.'').
1451  */
1452 #ifndef _SYS_SYSPROTO_H_
1453 struct chdir_args {
1454 	char	*path;
1455 };
1456 #endif
1457 /* ARGSUSED */
1458 int
1459 chdir(td, uap)
1460 	struct thread *td;
1461 	struct chdir_args /* {
1462 		syscallarg(char *) path;
1463 	} */ *uap;
1464 {
1465 	register struct filedesc *fdp = td->td_proc->p_fd;
1466 	int error;
1467 	struct nameidata nd;
1468 	struct vnode *vp;
1469 
1470 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1471 	    SCARG(uap, path), td);
1472 	if ((error = change_dir(&nd, td)) != 0)
1473 		return (error);
1474 	NDFREE(&nd, NDF_ONLY_PNBUF);
1475 	FILEDESC_LOCK(fdp);
1476 	vp = fdp->fd_cdir;
1477 	fdp->fd_cdir = nd.ni_vp;
1478 	FILEDESC_UNLOCK(fdp);
1479 	vrele(vp);
1480 	return (0);
1481 }
1482 
1483 /*
1484  * Helper function for raised chroot(2) security function:  Refuse if
1485  * any filedescriptors are open directories.
1486  */
1487 static int
1488 chroot_refuse_vdir_fds(fdp)
1489 	struct filedesc *fdp;
1490 {
1491 	struct vnode *vp;
1492 	struct file *fp;
1493 	int fd;
1494 
1495 	FILEDESC_LOCK(fdp);
1496 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1497 		fp = fget_locked(fdp, fd);
1498 		if (fp == NULL)
1499 			continue;
1500 		if (fp->f_type == DTYPE_VNODE) {
1501 			vp = (struct vnode *)fp->f_data;
1502 			if (vp->v_type == VDIR) {
1503 				FILEDESC_UNLOCK(fdp);
1504 				return (EPERM);
1505 			}
1506 		}
1507 	}
1508 	FILEDESC_UNLOCK(fdp);
1509 	return (0);
1510 }
1511 
1512 /*
1513  * This sysctl determines if we will allow a process to chroot(2) if it
1514  * has a directory open:
1515  *	0: disallowed for all processes.
1516  *	1: allowed for processes that were not already chroot(2)'ed.
1517  *	2: allowed for all processes.
1518  */
1519 
1520 static int chroot_allow_open_directories = 1;
1521 
1522 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1523      &chroot_allow_open_directories, 0, "");
1524 
1525 /*
1526  * Change notion of root (``/'') directory.
1527  */
1528 #ifndef _SYS_SYSPROTO_H_
1529 struct chroot_args {
1530 	char	*path;
1531 };
1532 #endif
1533 /* ARGSUSED */
1534 int
1535 chroot(td, uap)
1536 	struct thread *td;
1537 	struct chroot_args /* {
1538 		syscallarg(char *) path;
1539 	} */ *uap;
1540 {
1541 	register struct filedesc *fdp = td->td_proc->p_fd;
1542 	int error;
1543 	struct nameidata nd;
1544 	struct vnode *vp;
1545 
1546 	error = suser_cred(td->td_ucred, PRISON_ROOT);
1547 	if (error)
1548 		return (error);
1549 	FILEDESC_LOCK(fdp);
1550 	if (chroot_allow_open_directories == 0 ||
1551 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1552 		FILEDESC_UNLOCK(fdp);
1553 		error = chroot_refuse_vdir_fds(fdp);
1554 	} else
1555 		FILEDESC_UNLOCK(fdp);
1556 	if (error)
1557 		return (error);
1558 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1559 	    SCARG(uap, path), td);
1560 	if ((error = change_dir(&nd, td)) != 0)
1561 		return (error);
1562 	NDFREE(&nd, NDF_ONLY_PNBUF);
1563 	FILEDESC_LOCK(fdp);
1564 	vp = fdp->fd_rdir;
1565 	fdp->fd_rdir = nd.ni_vp;
1566 	if (!fdp->fd_jdir) {
1567 		fdp->fd_jdir = nd.ni_vp;
1568                 VREF(fdp->fd_jdir);
1569 	}
1570 	FILEDESC_UNLOCK(fdp);
1571 	vrele(vp);
1572 	return (0);
1573 }
1574 
1575 /*
1576  * Common routine for chroot and chdir.
1577  */
1578 static int
1579 change_dir(ndp, td)
1580 	register struct nameidata *ndp;
1581 	struct thread *td;
1582 {
1583 	struct vnode *vp;
1584 	int error;
1585 
1586 	error = namei(ndp);
1587 	if (error)
1588 		return (error);
1589 	vp = ndp->ni_vp;
1590 	if (vp->v_type != VDIR)
1591 		error = ENOTDIR;
1592 	else
1593 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1594 	if (error)
1595 		vput(vp);
1596 	else
1597 		VOP_UNLOCK(vp, 0, td);
1598 	return (error);
1599 }
1600 
1601 /*
1602  * Check permissions, allocate an open file structure,
1603  * and call the device open routine if any.
1604  */
1605 #ifndef _SYS_SYSPROTO_H_
1606 struct open_args {
1607 	char	*path;
1608 	int	flags;
1609 	int	mode;
1610 };
1611 #endif
1612 int
1613 open(td, uap)
1614 	struct thread *td;
1615 	register struct open_args /* {
1616 		syscallarg(char *) path;
1617 		syscallarg(int) flags;
1618 		syscallarg(int) mode;
1619 	} */ *uap;
1620 {
1621 	struct proc *p = td->td_proc;
1622 	struct filedesc *fdp = p->p_fd;
1623 	struct file *fp;
1624 	struct vnode *vp;
1625 	struct vattr vat;
1626 	struct mount *mp;
1627 	int cmode, flags, oflags;
1628 	struct file *nfp;
1629 	int type, indx, error;
1630 	struct flock lf;
1631 	struct nameidata nd;
1632 
1633 	oflags = SCARG(uap, flags);
1634 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1635 		return (EINVAL);
1636 	flags = FFLAGS(oflags);
1637 	error = falloc(td, &nfp, &indx);
1638 	if (error)
1639 		return (error);
1640 	fp = nfp;
1641 	FILEDESC_LOCK(fdp);
1642 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1643 	FILEDESC_UNLOCK(fdp);
1644 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1645 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1646 	/*
1647 	 * Bump the ref count to prevent another process from closing
1648 	 * the descriptor while we are blocked in vn_open()
1649 	 */
1650 	fhold(fp);
1651 	error = vn_open(&nd, &flags, cmode);
1652 	if (error) {
1653 		/*
1654 		 * release our own reference
1655 		 */
1656 		fdrop(fp, td);
1657 
1658 		/*
1659 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1660 		 * responsible for dropping the old contents of ofiles[indx]
1661 		 * if it succeeds.
1662 		 */
1663 		if ((error == ENODEV || error == ENXIO) &&
1664 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1665 		    (error =
1666 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1667 			td->td_retval[0] = indx;
1668 			return (0);
1669 		}
1670 		/*
1671 		 * Clean up the descriptor, but only if another thread hadn't
1672 		 * replaced or closed it.
1673 		 */
1674 		FILEDESC_LOCK(fdp);
1675 		if (fdp->fd_ofiles[indx] == fp) {
1676 			fdp->fd_ofiles[indx] = NULL;
1677 			FILEDESC_UNLOCK(fdp);
1678 			fdrop(fp, td);
1679 		} else
1680 			FILEDESC_UNLOCK(fdp);
1681 
1682 		if (error == ERESTART)
1683 			error = EINTR;
1684 		return (error);
1685 	}
1686 	td->td_dupfd = 0;
1687 	NDFREE(&nd, NDF_ONLY_PNBUF);
1688 	vp = nd.ni_vp;
1689 
1690 	/*
1691 	 * There should be 2 references on the file, one from the descriptor
1692 	 * table, and one for us.
1693 	 *
1694 	 * Handle the case where someone closed the file (via its file
1695 	 * descriptor) while we were blocked.  The end result should look
1696 	 * like opening the file succeeded but it was immediately closed.
1697 	 */
1698 	FILEDESC_LOCK(fdp);
1699 	FILE_LOCK(fp);
1700 	if (fp->f_count == 1) {
1701 		KASSERT(fdp->fd_ofiles[indx] != fp,
1702 		    ("Open file descriptor lost all refs"));
1703 		FILEDESC_UNLOCK(fdp);
1704 		FILE_UNLOCK(fp);
1705 		VOP_UNLOCK(vp, 0, td);
1706 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1707 		fdrop(fp, td);
1708 		td->td_retval[0] = indx;
1709 		return 0;
1710 	}
1711 
1712 	/* assert that vn_open created a backing object if one is needed */
1713 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1714 		("open: vmio vnode has no backing object after vn_open"));
1715 
1716 	fp->f_data = (caddr_t)vp;
1717 	fp->f_flag = flags & FMASK;
1718 	fp->f_ops = &vnops;
1719 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1720 	FILEDESC_UNLOCK(fdp);
1721 	FILE_UNLOCK(fp);
1722 	VOP_UNLOCK(vp, 0, td);
1723 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1724 		lf.l_whence = SEEK_SET;
1725 		lf.l_start = 0;
1726 		lf.l_len = 0;
1727 		if (flags & O_EXLOCK)
1728 			lf.l_type = F_WRLCK;
1729 		else
1730 			lf.l_type = F_RDLCK;
1731 		type = F_FLOCK;
1732 		if ((flags & FNONBLOCK) == 0)
1733 			type |= F_WAIT;
1734 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1735 			goto bad;
1736 		fp->f_flag |= FHASLOCK;
1737 	}
1738 	if (flags & O_TRUNC) {
1739 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1740 			goto bad;
1741 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1742 		VATTR_NULL(&vat);
1743 		vat.va_size = 0;
1744 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1745 		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1746 		VOP_UNLOCK(vp, 0, td);
1747 		vn_finished_write(mp);
1748 		if (error)
1749 			goto bad;
1750 	}
1751 	/*
1752 	 * Release our private reference, leaving the one associated with
1753 	 * the descriptor table intact.
1754 	 */
1755 	fdrop(fp, td);
1756 	td->td_retval[0] = indx;
1757 	return (0);
1758 bad:
1759 	FILEDESC_LOCK(fdp);
1760 	if (fdp->fd_ofiles[indx] == fp) {
1761 		fdp->fd_ofiles[indx] = NULL;
1762 		FILEDESC_UNLOCK(fdp);
1763 		fdrop(fp, td);
1764 	} else
1765 		FILEDESC_UNLOCK(fdp);
1766 	return (error);
1767 }
1768 
1769 #ifdef COMPAT_43
1770 /*
1771  * Create a file.
1772  */
1773 #ifndef _SYS_SYSPROTO_H_
1774 struct ocreat_args {
1775 	char	*path;
1776 	int	mode;
1777 };
1778 #endif
1779 int
1780 ocreat(td, uap)
1781 	struct thread *td;
1782 	register struct ocreat_args /* {
1783 		syscallarg(char *) path;
1784 		syscallarg(int) mode;
1785 	} */ *uap;
1786 {
1787 	struct open_args /* {
1788 		syscallarg(char *) path;
1789 		syscallarg(int) flags;
1790 		syscallarg(int) mode;
1791 	} */ nuap;
1792 
1793 	SCARG(&nuap, path) = SCARG(uap, path);
1794 	SCARG(&nuap, mode) = SCARG(uap, mode);
1795 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1796 	return (open(td, &nuap));
1797 }
1798 #endif /* COMPAT_43 */
1799 
1800 /*
1801  * Create a special file.
1802  */
1803 #ifndef _SYS_SYSPROTO_H_
1804 struct mknod_args {
1805 	char	*path;
1806 	int	mode;
1807 	int	dev;
1808 };
1809 #endif
1810 /* ARGSUSED */
1811 int
1812 mknod(td, uap)
1813 	struct thread *td;
1814 	register struct mknod_args /* {
1815 		syscallarg(char *) path;
1816 		syscallarg(int) mode;
1817 		syscallarg(int) dev;
1818 	} */ *uap;
1819 {
1820 	struct vnode *vp;
1821 	struct mount *mp;
1822 	struct vattr vattr;
1823 	int error;
1824 	int whiteout = 0;
1825 	struct nameidata nd;
1826 
1827 	switch (SCARG(uap, mode) & S_IFMT) {
1828 	case S_IFCHR:
1829 	case S_IFBLK:
1830 		error = suser(td);
1831 		break;
1832 	default:
1833 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1834 		break;
1835 	}
1836 	if (error)
1837 		return (error);
1838 restart:
1839 	bwillwrite();
1840 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1841 	if ((error = namei(&nd)) != 0)
1842 		return (error);
1843 	vp = nd.ni_vp;
1844 	if (vp != NULL) {
1845 		vrele(vp);
1846 		error = EEXIST;
1847 	} else {
1848 		VATTR_NULL(&vattr);
1849 		FILEDESC_LOCK(td->td_proc->p_fd);
1850 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1851 		FILEDESC_UNLOCK(td->td_proc->p_fd);
1852 		vattr.va_rdev = SCARG(uap, dev);
1853 		whiteout = 0;
1854 
1855 		switch (SCARG(uap, mode) & S_IFMT) {
1856 		case S_IFMT:	/* used by badsect to flag bad sectors */
1857 			vattr.va_type = VBAD;
1858 			break;
1859 		case S_IFCHR:
1860 			vattr.va_type = VCHR;
1861 			break;
1862 		case S_IFBLK:
1863 			vattr.va_type = VBLK;
1864 			break;
1865 		case S_IFWHT:
1866 			whiteout = 1;
1867 			break;
1868 		default:
1869 			error = EINVAL;
1870 			break;
1871 		}
1872 	}
1873 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1874 		NDFREE(&nd, NDF_ONLY_PNBUF);
1875 		vput(nd.ni_dvp);
1876 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1877 			return (error);
1878 		goto restart;
1879 	}
1880 	if (!error) {
1881 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1882 		if (whiteout)
1883 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1884 		else {
1885 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1886 						&nd.ni_cnd, &vattr);
1887 			if (error == 0)
1888 				vput(nd.ni_vp);
1889 		}
1890 	}
1891 	NDFREE(&nd, NDF_ONLY_PNBUF);
1892 	vput(nd.ni_dvp);
1893 	vn_finished_write(mp);
1894 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1895 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1896 	return (error);
1897 }
1898 
1899 /*
1900  * Create a named pipe.
1901  */
1902 #ifndef _SYS_SYSPROTO_H_
1903 struct mkfifo_args {
1904 	char	*path;
1905 	int	mode;
1906 };
1907 #endif
1908 /* ARGSUSED */
1909 int
1910 mkfifo(td, uap)
1911 	struct thread *td;
1912 	register struct mkfifo_args /* {
1913 		syscallarg(char *) path;
1914 		syscallarg(int) mode;
1915 	} */ *uap;
1916 {
1917 	struct mount *mp;
1918 	struct vattr vattr;
1919 	int error;
1920 	struct nameidata nd;
1921 
1922 restart:
1923 	bwillwrite();
1924 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1925 	if ((error = namei(&nd)) != 0)
1926 		return (error);
1927 	if (nd.ni_vp != NULL) {
1928 		NDFREE(&nd, NDF_ONLY_PNBUF);
1929 		vrele(nd.ni_vp);
1930 		vput(nd.ni_dvp);
1931 		return (EEXIST);
1932 	}
1933 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1934 		NDFREE(&nd, NDF_ONLY_PNBUF);
1935 		vput(nd.ni_dvp);
1936 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1937 			return (error);
1938 		goto restart;
1939 	}
1940 	VATTR_NULL(&vattr);
1941 	vattr.va_type = VFIFO;
1942 	FILEDESC_LOCK(td->td_proc->p_fd);
1943 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1944 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1945 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1946 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1947 	if (error == 0)
1948 		vput(nd.ni_vp);
1949 	NDFREE(&nd, NDF_ONLY_PNBUF);
1950 	vput(nd.ni_dvp);
1951 	vn_finished_write(mp);
1952 	return (error);
1953 }
1954 
1955 /*
1956  * Make a hard file link.
1957  */
1958 #ifndef _SYS_SYSPROTO_H_
1959 struct link_args {
1960 	char	*path;
1961 	char	*link;
1962 };
1963 #endif
1964 /* ARGSUSED */
1965 int
1966 link(td, uap)
1967 	struct thread *td;
1968 	register struct link_args /* {
1969 		syscallarg(char *) path;
1970 		syscallarg(char *) link;
1971 	} */ *uap;
1972 {
1973 	struct vnode *vp;
1974 	struct mount *mp;
1975 	struct nameidata nd;
1976 	int error;
1977 
1978 	bwillwrite();
1979 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1980 	if ((error = namei(&nd)) != 0)
1981 		return (error);
1982 	NDFREE(&nd, NDF_ONLY_PNBUF);
1983 	vp = nd.ni_vp;
1984 	if (vp->v_type == VDIR) {
1985 		vrele(vp);
1986 		return (EPERM);		/* POSIX */
1987 	}
1988 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1989 		vrele(vp);
1990 		return (error);
1991 	}
1992 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1993 	if ((error = namei(&nd)) == 0) {
1994 		if (nd.ni_vp != NULL) {
1995 			vrele(nd.ni_vp);
1996 			error = EEXIST;
1997 		} else {
1998 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1999 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2000 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2001 		}
2002 		NDFREE(&nd, NDF_ONLY_PNBUF);
2003 		vput(nd.ni_dvp);
2004 	}
2005 	vrele(vp);
2006 	vn_finished_write(mp);
2007 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
2008 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
2009 	return (error);
2010 }
2011 
2012 /*
2013  * Make a symbolic link.
2014  */
2015 #ifndef _SYS_SYSPROTO_H_
2016 struct symlink_args {
2017 	char	*path;
2018 	char	*link;
2019 };
2020 #endif
2021 /* ARGSUSED */
2022 int
2023 symlink(td, uap)
2024 	struct thread *td;
2025 	register struct symlink_args /* {
2026 		syscallarg(char *) path;
2027 		syscallarg(char *) link;
2028 	} */ *uap;
2029 {
2030 	struct mount *mp;
2031 	struct vattr vattr;
2032 	char *path;
2033 	int error;
2034 	struct nameidata nd;
2035 
2036 	path = uma_zalloc(namei_zone, M_WAITOK);
2037 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
2038 		goto out;
2039 restart:
2040 	bwillwrite();
2041 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2042 	if ((error = namei(&nd)) != 0)
2043 		goto out;
2044 	if (nd.ni_vp) {
2045 		NDFREE(&nd, NDF_ONLY_PNBUF);
2046 		vrele(nd.ni_vp);
2047 		vput(nd.ni_dvp);
2048 		error = EEXIST;
2049 		goto out;
2050 	}
2051 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2052 		NDFREE(&nd, NDF_ONLY_PNBUF);
2053 		vput(nd.ni_dvp);
2054 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2055 			return (error);
2056 		goto restart;
2057 	}
2058 	VATTR_NULL(&vattr);
2059 	FILEDESC_LOCK(td->td_proc->p_fd);
2060 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
2061 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2062 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2063 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2064 	NDFREE(&nd, NDF_ONLY_PNBUF);
2065 	if (error == 0)
2066 		vput(nd.ni_vp);
2067 	vput(nd.ni_dvp);
2068 	vn_finished_write(mp);
2069 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
2070 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
2071 out:
2072 	uma_zfree(namei_zone, path);
2073 	return (error);
2074 }
2075 
2076 /*
2077  * Delete a whiteout from the filesystem.
2078  */
2079 /* ARGSUSED */
2080 int
2081 undelete(td, uap)
2082 	struct thread *td;
2083 	register struct undelete_args /* {
2084 		syscallarg(char *) path;
2085 	} */ *uap;
2086 {
2087 	int error;
2088 	struct mount *mp;
2089 	struct nameidata nd;
2090 
2091 restart:
2092 	bwillwrite();
2093 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2094 	    SCARG(uap, path), td);
2095 	error = namei(&nd);
2096 	if (error)
2097 		return (error);
2098 
2099 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2100 		NDFREE(&nd, NDF_ONLY_PNBUF);
2101 		if (nd.ni_vp)
2102 			vrele(nd.ni_vp);
2103 		vput(nd.ni_dvp);
2104 		return (EEXIST);
2105 	}
2106 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2107 		NDFREE(&nd, NDF_ONLY_PNBUF);
2108 		vput(nd.ni_dvp);
2109 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2110 			return (error);
2111 		goto restart;
2112 	}
2113 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2114 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
2115 	NDFREE(&nd, NDF_ONLY_PNBUF);
2116 	vput(nd.ni_dvp);
2117 	vn_finished_write(mp);
2118 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
2119 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
2120 	return (error);
2121 }
2122 
2123 /*
2124  * Delete a name from the filesystem.
2125  */
2126 #ifndef _SYS_SYSPROTO_H_
2127 struct unlink_args {
2128 	char	*path;
2129 };
2130 #endif
2131 /* ARGSUSED */
2132 int
2133 unlink(td, uap)
2134 	struct thread *td;
2135 	struct unlink_args /* {
2136 		syscallarg(char *) path;
2137 	} */ *uap;
2138 {
2139 	struct mount *mp;
2140 	struct vnode *vp;
2141 	int error;
2142 	struct nameidata nd;
2143 
2144 restart:
2145 	bwillwrite();
2146 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
2147 	if ((error = namei(&nd)) != 0)
2148 		return (error);
2149 	vp = nd.ni_vp;
2150 	if (vp->v_type == VDIR)
2151 		error = EPERM;		/* POSIX */
2152 	else {
2153 		/*
2154 		 * The root of a mounted filesystem cannot be deleted.
2155 		 *
2156 		 * XXX: can this only be a VDIR case?
2157 		 */
2158 		if (vp->v_flag & VROOT)
2159 			error = EBUSY;
2160 	}
2161 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2162 		NDFREE(&nd, NDF_ONLY_PNBUF);
2163 		vrele(vp);
2164 		vput(nd.ni_dvp);
2165 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2166 			return (error);
2167 		goto restart;
2168 	}
2169 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2170 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2171 	if (!error) {
2172 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2173 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
2174 	}
2175 	NDFREE(&nd, NDF_ONLY_PNBUF);
2176 	vput(nd.ni_dvp);
2177 	vput(vp);
2178 	vn_finished_write(mp);
2179 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
2180 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
2181 	return (error);
2182 }
2183 
2184 /*
2185  * Reposition read/write file offset.
2186  */
2187 #ifndef _SYS_SYSPROTO_H_
2188 struct lseek_args {
2189 	int	fd;
2190 	int	pad;
2191 	off_t	offset;
2192 	int	whence;
2193 };
2194 #endif
2195 int
2196 lseek(td, uap)
2197 	struct thread *td;
2198 	register struct lseek_args /* {
2199 		syscallarg(int) fd;
2200 		syscallarg(int) pad;
2201 		syscallarg(off_t) offset;
2202 		syscallarg(int) whence;
2203 	} */ *uap;
2204 {
2205 	struct ucred *cred = td->td_ucred;
2206 	struct file *fp;
2207 	struct vnode *vp;
2208 	struct vattr vattr;
2209 	off_t offset;
2210 	int error, noneg;
2211 
2212 	if ((error = fget(td, uap->fd, &fp)) != 0)
2213 		return (error);
2214 	if (fp->f_type != DTYPE_VNODE) {
2215 		fdrop(fp, td);
2216 		return (ESPIPE);
2217 	}
2218 	vp = (struct vnode *)fp->f_data;
2219 	noneg = (vp->v_type != VCHR);
2220 	offset = SCARG(uap, offset);
2221 	switch (SCARG(uap, whence)) {
2222 	case L_INCR:
2223 		if (noneg &&
2224 		    (fp->f_offset < 0 ||
2225 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
2226 			return (EOVERFLOW);
2227 		offset += fp->f_offset;
2228 		break;
2229 	case L_XTND:
2230 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2231 		error = VOP_GETATTR(vp, &vattr, cred, td);
2232 		VOP_UNLOCK(vp, 0, td);
2233 		if (error)
2234 			return (error);
2235 		if (noneg &&
2236 		    (vattr.va_size > OFF_MAX ||
2237 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
2238 			return (EOVERFLOW);
2239 		offset += vattr.va_size;
2240 		break;
2241 	case L_SET:
2242 		break;
2243 	default:
2244 		fdrop(fp, td);
2245 		return (EINVAL);
2246 	}
2247 	if (noneg && offset < 0)
2248 		return (EINVAL);
2249 	fp->f_offset = offset;
2250 	*(off_t *)(td->td_retval) = fp->f_offset;
2251 	fdrop(fp, td);
2252 	return (0);
2253 }
2254 
2255 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2256 /*
2257  * Reposition read/write file offset.
2258  */
2259 #ifndef _SYS_SYSPROTO_H_
2260 struct olseek_args {
2261 	int	fd;
2262 	long	offset;
2263 	int	whence;
2264 };
2265 #endif
2266 int
2267 olseek(td, uap)
2268 	struct thread *td;
2269 	register struct olseek_args /* {
2270 		syscallarg(int) fd;
2271 		syscallarg(long) offset;
2272 		syscallarg(int) whence;
2273 	} */ *uap;
2274 {
2275 	struct lseek_args /* {
2276 		syscallarg(int) fd;
2277 		syscallarg(int) pad;
2278 		syscallarg(off_t) offset;
2279 		syscallarg(int) whence;
2280 	} */ nuap;
2281 	int error;
2282 
2283 	SCARG(&nuap, fd) = SCARG(uap, fd);
2284 	SCARG(&nuap, offset) = SCARG(uap, offset);
2285 	SCARG(&nuap, whence) = SCARG(uap, whence);
2286 	error = lseek(td, &nuap);
2287 	return (error);
2288 }
2289 #endif /* COMPAT_43 */
2290 
2291 /*
2292  * Check access permissions using passed credentials.
2293  */
2294 static int
2295 vn_access(vp, user_flags, cred, td)
2296 	struct vnode	*vp;
2297 	int		user_flags;
2298 	struct ucred	*cred;
2299 	struct thread	*td;
2300 {
2301 	int error, flags;
2302 
2303 	/* Flags == 0 means only check for existence. */
2304 	error = 0;
2305 	if (user_flags) {
2306 		flags = 0;
2307 		if (user_flags & R_OK)
2308 			flags |= VREAD;
2309 		if (user_flags & W_OK)
2310 			flags |= VWRITE;
2311 		if (user_flags & X_OK)
2312 			flags |= VEXEC;
2313 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2314 			error = VOP_ACCESS(vp, flags, cred, td);
2315 	}
2316 	return (error);
2317 }
2318 
2319 /*
2320  * Check access permissions using "real" credentials.
2321  */
2322 #ifndef _SYS_SYSPROTO_H_
2323 struct access_args {
2324 	char	*path;
2325 	int	flags;
2326 };
2327 #endif
2328 int
2329 access(td, uap)
2330 	struct thread *td;
2331 	register struct access_args /* {
2332 		syscallarg(char *) path;
2333 		syscallarg(int) flags;
2334 	} */ *uap;
2335 {
2336 	struct ucred *cred, *tmpcred;
2337 	register struct vnode *vp;
2338 	int error;
2339 	struct nameidata nd;
2340 
2341 	/*
2342 	 * Create and modify a temporary credential instead of one that
2343 	 * is potentially shared.  This could also mess up socket
2344 	 * buffer accounting which can run in an interrupt context.
2345 	 *
2346 	 * XXX - Depending on how "threads" are finally implemented, it
2347 	 * may be better to explicitly pass the credential to namei()
2348 	 * rather than to modify the potentially shared process structure.
2349 	 */
2350 	cred = td->td_ucred;
2351 	tmpcred = crdup(cred);
2352 	tmpcred->cr_uid = cred->cr_ruid;
2353 	tmpcred->cr_groups[0] = cred->cr_rgid;
2354 	td->td_ucred = tmpcred;
2355 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2356 	    SCARG(uap, path), td);
2357 	if ((error = namei(&nd)) != 0)
2358 		goto out1;
2359 	vp = nd.ni_vp;
2360 
2361 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
2362 	NDFREE(&nd, NDF_ONLY_PNBUF);
2363 	vput(vp);
2364 out1:
2365 	td->td_ucred = cred;
2366 	crfree(tmpcred);
2367 	return (error);
2368 }
2369 
2370 /*
2371  * Check access permissions using "effective" credentials.
2372  */
2373 #ifndef _SYS_SYSPROTO_H_
2374 struct eaccess_args {
2375 	char	*path;
2376 	int	flags;
2377 };
2378 #endif
2379 int
2380 eaccess(td, uap)
2381 	struct thread *td;
2382 	register struct eaccess_args /* {
2383 		syscallarg(char *) path;
2384 		syscallarg(int) flags;
2385 	} */ *uap;
2386 {
2387 	struct nameidata nd;
2388 	struct vnode *vp;
2389 	int error;
2390 
2391 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2392 	    SCARG(uap, path), td);
2393 	if ((error = namei(&nd)) != 0)
2394 		return (error);
2395 	vp = nd.ni_vp;
2396 
2397 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
2398 	NDFREE(&nd, NDF_ONLY_PNBUF);
2399 	vput(vp);
2400 	return (error);
2401 }
2402 
2403 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2404 /*
2405  * Get file status; this version follows links.
2406  */
2407 #ifndef _SYS_SYSPROTO_H_
2408 struct ostat_args {
2409 	char	*path;
2410 	struct ostat *ub;
2411 };
2412 #endif
2413 /* ARGSUSED */
2414 int
2415 ostat(td, uap)
2416 	struct thread *td;
2417 	register struct ostat_args /* {
2418 		syscallarg(char *) path;
2419 		syscallarg(struct ostat *) ub;
2420 	} */ *uap;
2421 {
2422 	struct stat sb;
2423 	struct ostat osb;
2424 	int error;
2425 	struct nameidata nd;
2426 
2427 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2428 	    SCARG(uap, path), td);
2429 	if ((error = namei(&nd)) != 0)
2430 		return (error);
2431 	NDFREE(&nd, NDF_ONLY_PNBUF);
2432 	error = vn_stat(nd.ni_vp, &sb, td);
2433 	vput(nd.ni_vp);
2434 	if (error)
2435 		return (error);
2436 	cvtstat(&sb, &osb);
2437 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2438 	return (error);
2439 }
2440 
2441 /*
2442  * Get file status; this version does not follow links.
2443  */
2444 #ifndef _SYS_SYSPROTO_H_
2445 struct olstat_args {
2446 	char	*path;
2447 	struct ostat *ub;
2448 };
2449 #endif
2450 /* ARGSUSED */
2451 int
2452 olstat(td, uap)
2453 	struct thread *td;
2454 	register struct olstat_args /* {
2455 		syscallarg(char *) path;
2456 		syscallarg(struct ostat *) ub;
2457 	} */ *uap;
2458 {
2459 	struct vnode *vp;
2460 	struct stat sb;
2461 	struct ostat osb;
2462 	int error;
2463 	struct nameidata nd;
2464 
2465 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2466 	    SCARG(uap, path), td);
2467 	if ((error = namei(&nd)) != 0)
2468 		return (error);
2469 	vp = nd.ni_vp;
2470 	error = vn_stat(vp, &sb, td);
2471 	NDFREE(&nd, NDF_ONLY_PNBUF);
2472 	vput(vp);
2473 	if (error)
2474 		return (error);
2475 	cvtstat(&sb, &osb);
2476 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2477 	return (error);
2478 }
2479 
2480 /*
2481  * Convert from an old to a new stat structure.
2482  */
2483 void
2484 cvtstat(st, ost)
2485 	struct stat *st;
2486 	struct ostat *ost;
2487 {
2488 
2489 	ost->st_dev = st->st_dev;
2490 	ost->st_ino = st->st_ino;
2491 	ost->st_mode = st->st_mode;
2492 	ost->st_nlink = st->st_nlink;
2493 	ost->st_uid = st->st_uid;
2494 	ost->st_gid = st->st_gid;
2495 	ost->st_rdev = st->st_rdev;
2496 	if (st->st_size < (quad_t)1 << 32)
2497 		ost->st_size = st->st_size;
2498 	else
2499 		ost->st_size = -2;
2500 	ost->st_atime = st->st_atime;
2501 	ost->st_mtime = st->st_mtime;
2502 	ost->st_ctime = st->st_ctime;
2503 	ost->st_blksize = st->st_blksize;
2504 	ost->st_blocks = st->st_blocks;
2505 	ost->st_flags = st->st_flags;
2506 	ost->st_gen = st->st_gen;
2507 }
2508 #endif /* COMPAT_43 || COMPAT_SUNOS */
2509 
2510 /*
2511  * Get file status; this version follows links.
2512  */
2513 #ifndef _SYS_SYSPROTO_H_
2514 struct stat_args {
2515 	char	*path;
2516 	struct stat *ub;
2517 };
2518 #endif
2519 /* ARGSUSED */
2520 int
2521 stat(td, uap)
2522 	struct thread *td;
2523 	register struct stat_args /* {
2524 		syscallarg(char *) path;
2525 		syscallarg(struct stat *) ub;
2526 	} */ *uap;
2527 {
2528 	struct stat sb;
2529 	int error;
2530 	struct nameidata nd;
2531 
2532 #ifdef LOOKUP_SHARED
2533 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2534 	    UIO_USERSPACE, SCARG(uap, path), td);
2535 #else
2536 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2537 	    SCARG(uap, path), td);
2538 #endif
2539 	if ((error = namei(&nd)) != 0)
2540 		return (error);
2541 	error = vn_stat(nd.ni_vp, &sb, td);
2542 	NDFREE(&nd, NDF_ONLY_PNBUF);
2543 	vput(nd.ni_vp);
2544 	if (error)
2545 		return (error);
2546 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2547 	return (error);
2548 }
2549 
2550 /*
2551  * Get file status; this version does not follow links.
2552  */
2553 #ifndef _SYS_SYSPROTO_H_
2554 struct lstat_args {
2555 	char	*path;
2556 	struct stat *ub;
2557 };
2558 #endif
2559 /* ARGSUSED */
2560 int
2561 lstat(td, uap)
2562 	struct thread *td;
2563 	register struct lstat_args /* {
2564 		syscallarg(char *) path;
2565 		syscallarg(struct stat *) ub;
2566 	} */ *uap;
2567 {
2568 	int error;
2569 	struct vnode *vp;
2570 	struct stat sb;
2571 	struct nameidata nd;
2572 
2573 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2574 	    SCARG(uap, path), td);
2575 	if ((error = namei(&nd)) != 0)
2576 		return (error);
2577 	vp = nd.ni_vp;
2578 	error = vn_stat(vp, &sb, td);
2579 	NDFREE(&nd, NDF_ONLY_PNBUF);
2580 	vput(vp);
2581 	if (error)
2582 		return (error);
2583 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2584 	return (error);
2585 }
2586 
2587 /*
2588  * Implementation of the NetBSD stat() function.
2589  * XXX This should probably be collapsed with the FreeBSD version,
2590  * as the differences are only due to vn_stat() clearing spares at
2591  * the end of the structures.  vn_stat could be split to avoid this,
2592  * and thus collapse the following to close to zero code.
2593  */
2594 void
2595 cvtnstat(sb, nsb)
2596 	struct stat *sb;
2597 	struct nstat *nsb;
2598 {
2599 	bzero(nsb, sizeof *nsb);
2600 	nsb->st_dev = sb->st_dev;
2601 	nsb->st_ino = sb->st_ino;
2602 	nsb->st_mode = sb->st_mode;
2603 	nsb->st_nlink = sb->st_nlink;
2604 	nsb->st_uid = sb->st_uid;
2605 	nsb->st_gid = sb->st_gid;
2606 	nsb->st_rdev = sb->st_rdev;
2607 	nsb->st_atimespec = sb->st_atimespec;
2608 	nsb->st_mtimespec = sb->st_mtimespec;
2609 	nsb->st_ctimespec = sb->st_ctimespec;
2610 	nsb->st_size = sb->st_size;
2611 	nsb->st_blocks = sb->st_blocks;
2612 	nsb->st_blksize = sb->st_blksize;
2613 	nsb->st_flags = sb->st_flags;
2614 	nsb->st_gen = sb->st_gen;
2615 	nsb->st_createtimespec = sb->st_createtimespec;
2616 }
2617 
2618 #ifndef _SYS_SYSPROTO_H_
2619 struct nstat_args {
2620 	char	*path;
2621 	struct nstat *ub;
2622 };
2623 #endif
2624 /* ARGSUSED */
2625 int
2626 nstat(td, uap)
2627 	struct thread *td;
2628 	register struct nstat_args /* {
2629 		syscallarg(char *) path;
2630 		syscallarg(struct nstat *) ub;
2631 	} */ *uap;
2632 {
2633 	struct stat sb;
2634 	struct nstat nsb;
2635 	int error;
2636 	struct nameidata nd;
2637 
2638 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2639 	    SCARG(uap, path), td);
2640 	if ((error = namei(&nd)) != 0)
2641 		return (error);
2642 	NDFREE(&nd, NDF_ONLY_PNBUF);
2643 	error = vn_stat(nd.ni_vp, &sb, td);
2644 	vput(nd.ni_vp);
2645 	if (error)
2646 		return (error);
2647 	cvtnstat(&sb, &nsb);
2648 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2649 	return (error);
2650 }
2651 
2652 /*
2653  * NetBSD lstat.  Get file status; this version does not follow links.
2654  */
2655 #ifndef _SYS_SYSPROTO_H_
2656 struct lstat_args {
2657 	char	*path;
2658 	struct stat *ub;
2659 };
2660 #endif
2661 /* ARGSUSED */
2662 int
2663 nlstat(td, uap)
2664 	struct thread *td;
2665 	register struct nlstat_args /* {
2666 		syscallarg(char *) path;
2667 		syscallarg(struct nstat *) ub;
2668 	} */ *uap;
2669 {
2670 	int error;
2671 	struct vnode *vp;
2672 	struct stat sb;
2673 	struct nstat nsb;
2674 	struct nameidata nd;
2675 
2676 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2677 	    SCARG(uap, path), td);
2678 	if ((error = namei(&nd)) != 0)
2679 		return (error);
2680 	vp = nd.ni_vp;
2681 	NDFREE(&nd, NDF_ONLY_PNBUF);
2682 	error = vn_stat(vp, &sb, td);
2683 	vput(vp);
2684 	if (error)
2685 		return (error);
2686 	cvtnstat(&sb, &nsb);
2687 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2688 	return (error);
2689 }
2690 
2691 /*
2692  * Get configurable pathname variables.
2693  */
2694 #ifndef _SYS_SYSPROTO_H_
2695 struct pathconf_args {
2696 	char	*path;
2697 	int	name;
2698 };
2699 #endif
2700 /* ARGSUSED */
2701 int
2702 pathconf(td, uap)
2703 	struct thread *td;
2704 	register struct pathconf_args /* {
2705 		syscallarg(char *) path;
2706 		syscallarg(int) name;
2707 	} */ *uap;
2708 {
2709 	int error;
2710 	struct nameidata nd;
2711 
2712 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2713 	    SCARG(uap, path), td);
2714 	if ((error = namei(&nd)) != 0)
2715 		return (error);
2716 	NDFREE(&nd, NDF_ONLY_PNBUF);
2717 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2718 	vput(nd.ni_vp);
2719 	return (error);
2720 }
2721 
2722 /*
2723  * Return target name of a symbolic link.
2724  */
2725 #ifndef _SYS_SYSPROTO_H_
2726 struct readlink_args {
2727 	char	*path;
2728 	char	*buf;
2729 	int	count;
2730 };
2731 #endif
2732 /* ARGSUSED */
2733 int
2734 readlink(td, uap)
2735 	struct thread *td;
2736 	register struct readlink_args /* {
2737 		syscallarg(char *) path;
2738 		syscallarg(char *) buf;
2739 		syscallarg(int) count;
2740 	} */ *uap;
2741 {
2742 	register struct vnode *vp;
2743 	struct iovec aiov;
2744 	struct uio auio;
2745 	int error;
2746 	struct nameidata nd;
2747 
2748 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2749 	    SCARG(uap, path), td);
2750 	if ((error = namei(&nd)) != 0)
2751 		return (error);
2752 	NDFREE(&nd, NDF_ONLY_PNBUF);
2753 	vp = nd.ni_vp;
2754 	if (vp->v_type != VLNK)
2755 		error = EINVAL;
2756 	else {
2757 		aiov.iov_base = SCARG(uap, buf);
2758 		aiov.iov_len = SCARG(uap, count);
2759 		auio.uio_iov = &aiov;
2760 		auio.uio_iovcnt = 1;
2761 		auio.uio_offset = 0;
2762 		auio.uio_rw = UIO_READ;
2763 		auio.uio_segflg = UIO_USERSPACE;
2764 		auio.uio_td = td;
2765 		auio.uio_resid = SCARG(uap, count);
2766 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2767 	}
2768 	vput(vp);
2769 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2770 	return (error);
2771 }
2772 
2773 /*
2774  * Common implementation code for chflags() and fchflags().
2775  */
2776 static int
2777 setfflags(td, vp, flags)
2778 	struct thread *td;
2779 	struct vnode *vp;
2780 	int flags;
2781 {
2782 	int error;
2783 	struct mount *mp;
2784 	struct vattr vattr;
2785 
2786 	/*
2787 	 * Prevent non-root users from setting flags on devices.  When
2788 	 * a device is reused, users can retain ownership of the device
2789 	 * if they are allowed to set flags and programs assume that
2790 	 * chown can't fail when done as root.
2791 	 */
2792 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2793 		error = suser_cred(td->td_ucred, PRISON_ROOT);
2794 		if (error)
2795 			return (error);
2796 	}
2797 
2798 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2799 		return (error);
2800 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2801 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2802 	VATTR_NULL(&vattr);
2803 	vattr.va_flags = flags;
2804 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2805 	VOP_UNLOCK(vp, 0, td);
2806 	vn_finished_write(mp);
2807 	return (error);
2808 }
2809 
2810 /*
2811  * Change flags of a file given a path name.
2812  */
2813 #ifndef _SYS_SYSPROTO_H_
2814 struct chflags_args {
2815 	char	*path;
2816 	int	flags;
2817 };
2818 #endif
2819 /* ARGSUSED */
2820 int
2821 chflags(td, uap)
2822 	struct thread *td;
2823 	register struct chflags_args /* {
2824 		syscallarg(char *) path;
2825 		syscallarg(int) flags;
2826 	} */ *uap;
2827 {
2828 	int error;
2829 	struct nameidata nd;
2830 
2831 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2832 	if ((error = namei(&nd)) != 0)
2833 		return (error);
2834 	NDFREE(&nd, NDF_ONLY_PNBUF);
2835 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2836 	vrele(nd.ni_vp);
2837 	return error;
2838 }
2839 
2840 /*
2841  * Same as chflags() but doesn't follow symlinks.
2842  */
2843 int
2844 lchflags(td, uap)
2845 	struct thread *td;
2846 	register struct lchflags_args /* {
2847 		syscallarg(char *) path;
2848 		syscallarg(int) flags;
2849 	} */ *uap;
2850 {
2851 	int error;
2852 	struct nameidata nd;
2853 
2854 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2855 	if ((error = namei(&nd)) != 0)
2856 		return (error);
2857 	NDFREE(&nd, NDF_ONLY_PNBUF);
2858 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2859 	vrele(nd.ni_vp);
2860 	return error;
2861 }
2862 
2863 /*
2864  * Change flags of a file given a file descriptor.
2865  */
2866 #ifndef _SYS_SYSPROTO_H_
2867 struct fchflags_args {
2868 	int	fd;
2869 	int	flags;
2870 };
2871 #endif
2872 /* ARGSUSED */
2873 int
2874 fchflags(td, uap)
2875 	struct thread *td;
2876 	register struct fchflags_args /* {
2877 		syscallarg(int) fd;
2878 		syscallarg(int) flags;
2879 	} */ *uap;
2880 {
2881 	struct file *fp;
2882 	int error;
2883 
2884 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2885 		return (error);
2886 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2887 	fdrop(fp, td);
2888 	return (error);
2889 }
2890 
2891 /*
2892  * Common implementation code for chmod(), lchmod() and fchmod().
2893  */
2894 static int
2895 setfmode(td, vp, mode)
2896 	struct thread *td;
2897 	struct vnode *vp;
2898 	int mode;
2899 {
2900 	int error;
2901 	struct mount *mp;
2902 	struct vattr vattr;
2903 
2904 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2905 		return (error);
2906 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2907 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2908 	VATTR_NULL(&vattr);
2909 	vattr.va_mode = mode & ALLPERMS;
2910 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2911 	VOP_UNLOCK(vp, 0, td);
2912 	vn_finished_write(mp);
2913 	return error;
2914 }
2915 
2916 /*
2917  * Change mode of a file given path name.
2918  */
2919 #ifndef _SYS_SYSPROTO_H_
2920 struct chmod_args {
2921 	char	*path;
2922 	int	mode;
2923 };
2924 #endif
2925 /* ARGSUSED */
2926 int
2927 chmod(td, uap)
2928 	struct thread *td;
2929 	register struct chmod_args /* {
2930 		syscallarg(char *) path;
2931 		syscallarg(int) mode;
2932 	} */ *uap;
2933 {
2934 	int error;
2935 	struct nameidata nd;
2936 
2937 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2938 	if ((error = namei(&nd)) != 0)
2939 		return (error);
2940 	NDFREE(&nd, NDF_ONLY_PNBUF);
2941 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2942 	vrele(nd.ni_vp);
2943 	return error;
2944 }
2945 
2946 /*
2947  * Change mode of a file given path name (don't follow links.)
2948  */
2949 #ifndef _SYS_SYSPROTO_H_
2950 struct lchmod_args {
2951 	char	*path;
2952 	int	mode;
2953 };
2954 #endif
2955 /* ARGSUSED */
2956 int
2957 lchmod(td, uap)
2958 	struct thread *td;
2959 	register struct lchmod_args /* {
2960 		syscallarg(char *) path;
2961 		syscallarg(int) mode;
2962 	} */ *uap;
2963 {
2964 	int error;
2965 	struct nameidata nd;
2966 
2967 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2968 	if ((error = namei(&nd)) != 0)
2969 		return (error);
2970 	NDFREE(&nd, NDF_ONLY_PNBUF);
2971 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2972 	vrele(nd.ni_vp);
2973 	return error;
2974 }
2975 
2976 /*
2977  * Change mode of a file given a file descriptor.
2978  */
2979 #ifndef _SYS_SYSPROTO_H_
2980 struct fchmod_args {
2981 	int	fd;
2982 	int	mode;
2983 };
2984 #endif
2985 /* ARGSUSED */
2986 int
2987 fchmod(td, uap)
2988 	struct thread *td;
2989 	register struct fchmod_args /* {
2990 		syscallarg(int) fd;
2991 		syscallarg(int) mode;
2992 	} */ *uap;
2993 {
2994 	struct file *fp;
2995 	struct vnode *vp;
2996 	int error;
2997 
2998 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2999 		return (error);
3000 	vp = (struct vnode *)fp->f_data;
3001 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
3002 	fdrop(fp, td);
3003 	return (error);
3004 }
3005 
3006 /*
3007  * Common implementation for chown(), lchown(), and fchown()
3008  */
3009 static int
3010 setfown(td, vp, uid, gid)
3011 	struct thread *td;
3012 	struct vnode *vp;
3013 	uid_t uid;
3014 	gid_t gid;
3015 {
3016 	int error;
3017 	struct mount *mp;
3018 	struct vattr vattr;
3019 
3020 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3021 		return (error);
3022 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3023 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3024 	VATTR_NULL(&vattr);
3025 	vattr.va_uid = uid;
3026 	vattr.va_gid = gid;
3027 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3028 	VOP_UNLOCK(vp, 0, td);
3029 	vn_finished_write(mp);
3030 	return error;
3031 }
3032 
3033 /*
3034  * Set ownership given a path name.
3035  */
3036 #ifndef _SYS_SYSPROTO_H_
3037 struct chown_args {
3038 	char	*path;
3039 	int	uid;
3040 	int	gid;
3041 };
3042 #endif
3043 /* ARGSUSED */
3044 int
3045 chown(td, uap)
3046 	struct thread *td;
3047 	register struct chown_args /* {
3048 		syscallarg(char *) path;
3049 		syscallarg(int) uid;
3050 		syscallarg(int) gid;
3051 	} */ *uap;
3052 {
3053 	int error;
3054 	struct nameidata nd;
3055 
3056 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3057 	if ((error = namei(&nd)) != 0)
3058 		return (error);
3059 	NDFREE(&nd, NDF_ONLY_PNBUF);
3060 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3061 	vrele(nd.ni_vp);
3062 	return (error);
3063 }
3064 
3065 /*
3066  * Set ownership given a path name, do not cross symlinks.
3067  */
3068 #ifndef _SYS_SYSPROTO_H_
3069 struct lchown_args {
3070 	char	*path;
3071 	int	uid;
3072 	int	gid;
3073 };
3074 #endif
3075 /* ARGSUSED */
3076 int
3077 lchown(td, uap)
3078 	struct thread *td;
3079 	register struct lchown_args /* {
3080 		syscallarg(char *) path;
3081 		syscallarg(int) uid;
3082 		syscallarg(int) gid;
3083 	} */ *uap;
3084 {
3085 	int error;
3086 	struct nameidata nd;
3087 
3088 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3089 	if ((error = namei(&nd)) != 0)
3090 		return (error);
3091 	NDFREE(&nd, NDF_ONLY_PNBUF);
3092 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3093 	vrele(nd.ni_vp);
3094 	return (error);
3095 }
3096 
3097 /*
3098  * Set ownership given a file descriptor.
3099  */
3100 #ifndef _SYS_SYSPROTO_H_
3101 struct fchown_args {
3102 	int	fd;
3103 	int	uid;
3104 	int	gid;
3105 };
3106 #endif
3107 /* ARGSUSED */
3108 int
3109 fchown(td, uap)
3110 	struct thread *td;
3111 	register struct fchown_args /* {
3112 		syscallarg(int) fd;
3113 		syscallarg(int) uid;
3114 		syscallarg(int) gid;
3115 	} */ *uap;
3116 {
3117 	struct file *fp;
3118 	struct vnode *vp;
3119 	int error;
3120 
3121 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3122 		return (error);
3123 	vp = (struct vnode *)fp->f_data;
3124 	error = setfown(td, (struct vnode *)fp->f_data,
3125 		SCARG(uap, uid), SCARG(uap, gid));
3126 	fdrop(fp, td);
3127 	return (error);
3128 }
3129 
3130 /*
3131  * Common implementation code for utimes(), lutimes(), and futimes().
3132  */
3133 static int
3134 getutimes(usrtvp, tsp)
3135 	const struct timeval *usrtvp;
3136 	struct timespec *tsp;
3137 {
3138 	struct timeval tv[2];
3139 	int error;
3140 
3141 	if (usrtvp == NULL) {
3142 		microtime(&tv[0]);
3143 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3144 		tsp[1] = tsp[0];
3145 	} else {
3146 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
3147 			return (error);
3148 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3149 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
3150 	}
3151 	return 0;
3152 }
3153 
3154 /*
3155  * Common implementation code for utimes(), lutimes(), and futimes().
3156  */
3157 static int
3158 setutimes(td, vp, ts, nullflag)
3159 	struct thread *td;
3160 	struct vnode *vp;
3161 	const struct timespec *ts;
3162 	int nullflag;
3163 {
3164 	int error;
3165 	struct mount *mp;
3166 	struct vattr vattr;
3167 
3168 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3169 		return (error);
3170 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3171 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3172 	VATTR_NULL(&vattr);
3173 	vattr.va_atime = ts[0];
3174 	vattr.va_mtime = ts[1];
3175 	if (nullflag)
3176 		vattr.va_vaflags |= VA_UTIMES_NULL;
3177 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3178 	VOP_UNLOCK(vp, 0, td);
3179 	vn_finished_write(mp);
3180 	return error;
3181 }
3182 
3183 /*
3184  * Set the access and modification times of a file.
3185  */
3186 #ifndef _SYS_SYSPROTO_H_
3187 struct utimes_args {
3188 	char	*path;
3189 	struct	timeval *tptr;
3190 };
3191 #endif
3192 /* ARGSUSED */
3193 int
3194 utimes(td, uap)
3195 	struct thread *td;
3196 	register struct utimes_args /* {
3197 		syscallarg(char *) path;
3198 		syscallarg(struct timeval *) tptr;
3199 	} */ *uap;
3200 {
3201 	struct timespec ts[2];
3202 	struct timeval *usrtvp;
3203 	int error;
3204 	struct nameidata nd;
3205 
3206 	usrtvp = SCARG(uap, tptr);
3207 	if ((error = getutimes(usrtvp, ts)) != 0)
3208 		return (error);
3209 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3210 	if ((error = namei(&nd)) != 0)
3211 		return (error);
3212 	NDFREE(&nd, NDF_ONLY_PNBUF);
3213 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3214 	vrele(nd.ni_vp);
3215 	return (error);
3216 }
3217 
3218 /*
3219  * Set the access and modification times of a file.
3220  */
3221 #ifndef _SYS_SYSPROTO_H_
3222 struct lutimes_args {
3223 	char	*path;
3224 	struct	timeval *tptr;
3225 };
3226 #endif
3227 /* ARGSUSED */
3228 int
3229 lutimes(td, uap)
3230 	struct thread *td;
3231 	register struct lutimes_args /* {
3232 		syscallarg(char *) path;
3233 		syscallarg(struct timeval *) tptr;
3234 	} */ *uap;
3235 {
3236 	struct timespec ts[2];
3237 	struct timeval *usrtvp;
3238 	int error;
3239 	struct nameidata nd;
3240 
3241 	usrtvp = SCARG(uap, tptr);
3242 	if ((error = getutimes(usrtvp, ts)) != 0)
3243 		return (error);
3244 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3245 	if ((error = namei(&nd)) != 0)
3246 		return (error);
3247 	NDFREE(&nd, NDF_ONLY_PNBUF);
3248 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3249 	vrele(nd.ni_vp);
3250 	return (error);
3251 }
3252 
3253 /*
3254  * Set the access and modification times of a file.
3255  */
3256 #ifndef _SYS_SYSPROTO_H_
3257 struct futimes_args {
3258 	int	fd;
3259 	struct	timeval *tptr;
3260 };
3261 #endif
3262 /* ARGSUSED */
3263 int
3264 futimes(td, uap)
3265 	struct thread *td;
3266 	register struct futimes_args /* {
3267 		syscallarg(int ) fd;
3268 		syscallarg(struct timeval *) tptr;
3269 	} */ *uap;
3270 {
3271 	struct timespec ts[2];
3272 	struct file *fp;
3273 	struct timeval *usrtvp;
3274 	int error;
3275 
3276 	usrtvp = SCARG(uap, tptr);
3277 	if ((error = getutimes(usrtvp, ts)) != 0)
3278 		return (error);
3279 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3280 		return (error);
3281 	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
3282 	fdrop(fp, td);
3283 	return (error);
3284 }
3285 
3286 /*
3287  * Truncate a file given its path name.
3288  */
3289 #ifndef _SYS_SYSPROTO_H_
3290 struct truncate_args {
3291 	char	*path;
3292 	int	pad;
3293 	off_t	length;
3294 };
3295 #endif
3296 /* ARGSUSED */
3297 int
3298 truncate(td, uap)
3299 	struct thread *td;
3300 	register struct truncate_args /* {
3301 		syscallarg(char *) path;
3302 		syscallarg(int) pad;
3303 		syscallarg(off_t) length;
3304 	} */ *uap;
3305 {
3306 	struct mount *mp;
3307 	struct vnode *vp;
3308 	struct vattr vattr;
3309 	int error;
3310 	struct nameidata nd;
3311 
3312 	if (uap->length < 0)
3313 		return(EINVAL);
3314 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3315 	if ((error = namei(&nd)) != 0)
3316 		return (error);
3317 	vp = nd.ni_vp;
3318 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3319 		vrele(vp);
3320 		return (error);
3321 	}
3322 	NDFREE(&nd, NDF_ONLY_PNBUF);
3323 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3324 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3325 	if (vp->v_type == VDIR)
3326 		error = EISDIR;
3327 	else if ((error = vn_writechk(vp)) == 0 &&
3328 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3329 		VATTR_NULL(&vattr);
3330 		vattr.va_size = SCARG(uap, length);
3331 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3332 	}
3333 	vput(vp);
3334 	vn_finished_write(mp);
3335 	return (error);
3336 }
3337 
3338 /*
3339  * Truncate a file given a file descriptor.
3340  */
3341 #ifndef _SYS_SYSPROTO_H_
3342 struct ftruncate_args {
3343 	int	fd;
3344 	int	pad;
3345 	off_t	length;
3346 };
3347 #endif
3348 /* ARGSUSED */
3349 int
3350 ftruncate(td, uap)
3351 	struct thread *td;
3352 	register struct ftruncate_args /* {
3353 		syscallarg(int) fd;
3354 		syscallarg(int) pad;
3355 		syscallarg(off_t) length;
3356 	} */ *uap;
3357 {
3358 	struct mount *mp;
3359 	struct vattr vattr;
3360 	struct vnode *vp;
3361 	struct file *fp;
3362 	int error;
3363 
3364 	if (uap->length < 0)
3365 		return(EINVAL);
3366 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3367 		return (error);
3368 	if ((fp->f_flag & FWRITE) == 0) {
3369 		fdrop(fp, td);
3370 		return (EINVAL);
3371 	}
3372 	vp = (struct vnode *)fp->f_data;
3373 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3374 		fdrop(fp, td);
3375 		return (error);
3376 	}
3377 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3378 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3379 	if (vp->v_type == VDIR)
3380 		error = EISDIR;
3381 	else if ((error = vn_writechk(vp)) == 0) {
3382 		VATTR_NULL(&vattr);
3383 		vattr.va_size = SCARG(uap, length);
3384 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3385 	}
3386 	VOP_UNLOCK(vp, 0, td);
3387 	vn_finished_write(mp);
3388 	fdrop(fp, td);
3389 	return (error);
3390 }
3391 
3392 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3393 /*
3394  * Truncate a file given its path name.
3395  */
3396 #ifndef _SYS_SYSPROTO_H_
3397 struct otruncate_args {
3398 	char	*path;
3399 	long	length;
3400 };
3401 #endif
3402 /* ARGSUSED */
3403 int
3404 otruncate(td, uap)
3405 	struct thread *td;
3406 	register struct otruncate_args /* {
3407 		syscallarg(char *) path;
3408 		syscallarg(long) length;
3409 	} */ *uap;
3410 {
3411 	struct truncate_args /* {
3412 		syscallarg(char *) path;
3413 		syscallarg(int) pad;
3414 		syscallarg(off_t) length;
3415 	} */ nuap;
3416 
3417 	SCARG(&nuap, path) = SCARG(uap, path);
3418 	SCARG(&nuap, length) = SCARG(uap, length);
3419 	return (truncate(td, &nuap));
3420 }
3421 
3422 /*
3423  * Truncate a file given a file descriptor.
3424  */
3425 #ifndef _SYS_SYSPROTO_H_
3426 struct oftruncate_args {
3427 	int	fd;
3428 	long	length;
3429 };
3430 #endif
3431 /* ARGSUSED */
3432 int
3433 oftruncate(td, uap)
3434 	struct thread *td;
3435 	register struct oftruncate_args /* {
3436 		syscallarg(int) fd;
3437 		syscallarg(long) length;
3438 	} */ *uap;
3439 {
3440 	struct ftruncate_args /* {
3441 		syscallarg(int) fd;
3442 		syscallarg(int) pad;
3443 		syscallarg(off_t) length;
3444 	} */ nuap;
3445 
3446 	SCARG(&nuap, fd) = SCARG(uap, fd);
3447 	SCARG(&nuap, length) = SCARG(uap, length);
3448 	return (ftruncate(td, &nuap));
3449 }
3450 #endif /* COMPAT_43 || COMPAT_SUNOS */
3451 
3452 /*
3453  * Sync an open file.
3454  */
3455 #ifndef _SYS_SYSPROTO_H_
3456 struct fsync_args {
3457 	int	fd;
3458 };
3459 #endif
3460 /* ARGSUSED */
3461 int
3462 fsync(td, uap)
3463 	struct thread *td;
3464 	struct fsync_args /* {
3465 		syscallarg(int) fd;
3466 	} */ *uap;
3467 {
3468 	struct vnode *vp;
3469 	struct mount *mp;
3470 	struct file *fp;
3471 	vm_object_t obj;
3472 	int error;
3473 
3474 	GIANT_REQUIRED;
3475 
3476 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3477 		return (error);
3478 	vp = (struct vnode *)fp->f_data;
3479 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3480 		fdrop(fp, td);
3481 		return (error);
3482 	}
3483 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3484 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3485 		vm_object_page_clean(obj, 0, 0, 0);
3486 	}
3487 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3488 #ifdef SOFTUPDATES
3489 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3490 	    error = softdep_fsync(vp);
3491 #endif
3492 
3493 	VOP_UNLOCK(vp, 0, td);
3494 	vn_finished_write(mp);
3495 	fdrop(fp, td);
3496 	return (error);
3497 }
3498 
3499 /*
3500  * Rename files.  Source and destination must either both be directories,
3501  * or both not be directories.  If target is a directory, it must be empty.
3502  */
3503 #ifndef _SYS_SYSPROTO_H_
3504 struct rename_args {
3505 	char	*from;
3506 	char	*to;
3507 };
3508 #endif
3509 /* ARGSUSED */
3510 int
3511 rename(td, uap)
3512 	struct thread *td;
3513 	register struct rename_args /* {
3514 		syscallarg(char *) from;
3515 		syscallarg(char *) to;
3516 	} */ *uap;
3517 {
3518 	struct mount *mp;
3519 	struct vnode *tvp, *fvp, *tdvp;
3520 	struct nameidata fromnd, tond;
3521 	int error;
3522 
3523 	bwillwrite();
3524 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3525 	    SCARG(uap, from), td);
3526 	if ((error = namei(&fromnd)) != 0)
3527 		return (error);
3528 	fvp = fromnd.ni_vp;
3529 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3530 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3531 		vrele(fromnd.ni_dvp);
3532 		vrele(fvp);
3533 		goto out1;
3534 	}
3535 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3536 	    UIO_USERSPACE, SCARG(uap, to), td);
3537 	if (fromnd.ni_vp->v_type == VDIR)
3538 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3539 	if ((error = namei(&tond)) != 0) {
3540 		/* Translate error code for rename("dir1", "dir2/."). */
3541 		if (error == EISDIR && fvp->v_type == VDIR)
3542 			error = EINVAL;
3543 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3544 		vrele(fromnd.ni_dvp);
3545 		vrele(fvp);
3546 		goto out1;
3547 	}
3548 	tdvp = tond.ni_dvp;
3549 	tvp = tond.ni_vp;
3550 	if (tvp != NULL) {
3551 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3552 			error = ENOTDIR;
3553 			goto out;
3554 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3555 			error = EISDIR;
3556 			goto out;
3557 		}
3558 	}
3559 	if (fvp == tdvp)
3560 		error = EINVAL;
3561 	/*
3562 	 * If source is the same as the destination (that is the
3563 	 * same inode number with the same name in the same directory),
3564 	 * then there is nothing to do.
3565 	 */
3566 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3567 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3568 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3569 	      fromnd.ni_cnd.cn_namelen))
3570 		error = -1;
3571 out:
3572 	if (!error) {
3573 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3574 		if (fromnd.ni_dvp != tdvp) {
3575 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3576 		}
3577 		if (tvp) {
3578 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3579 		}
3580 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3581 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3582 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3583 		NDFREE(&tond, NDF_ONLY_PNBUF);
3584 	} else {
3585 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3586 		NDFREE(&tond, NDF_ONLY_PNBUF);
3587 		if (tdvp == tvp)
3588 			vrele(tdvp);
3589 		else
3590 			vput(tdvp);
3591 		if (tvp)
3592 			vput(tvp);
3593 		vrele(fromnd.ni_dvp);
3594 		vrele(fvp);
3595 	}
3596 	vrele(tond.ni_startdir);
3597 	vn_finished_write(mp);
3598 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3599 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3600 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3601 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3602 out1:
3603 	if (fromnd.ni_startdir)
3604 		vrele(fromnd.ni_startdir);
3605 	if (error == -1)
3606 		return (0);
3607 	return (error);
3608 }
3609 
3610 /*
3611  * Make a directory file.
3612  */
3613 #ifndef _SYS_SYSPROTO_H_
3614 struct mkdir_args {
3615 	char	*path;
3616 	int	mode;
3617 };
3618 #endif
3619 /* ARGSUSED */
3620 int
3621 mkdir(td, uap)
3622 	struct thread *td;
3623 	register struct mkdir_args /* {
3624 		syscallarg(char *) path;
3625 		syscallarg(int) mode;
3626 	} */ *uap;
3627 {
3628 
3629 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3630 }
3631 
3632 int
3633 vn_mkdir(path, mode, segflg, td)
3634 	char *path;
3635 	int mode;
3636 	enum uio_seg segflg;
3637 	struct thread *td;
3638 {
3639 	struct mount *mp;
3640 	struct vnode *vp;
3641 	struct vattr vattr;
3642 	int error;
3643 	struct nameidata nd;
3644 
3645 restart:
3646 	bwillwrite();
3647 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3648 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3649 	if ((error = namei(&nd)) != 0)
3650 		return (error);
3651 	vp = nd.ni_vp;
3652 	if (vp != NULL) {
3653 		NDFREE(&nd, NDF_ONLY_PNBUF);
3654 		vrele(vp);
3655 		vput(nd.ni_dvp);
3656 		return (EEXIST);
3657 	}
3658 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3659 		NDFREE(&nd, NDF_ONLY_PNBUF);
3660 		vput(nd.ni_dvp);
3661 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3662 			return (error);
3663 		goto restart;
3664 	}
3665 	VATTR_NULL(&vattr);
3666 	vattr.va_type = VDIR;
3667 	FILEDESC_LOCK(td->td_proc->p_fd);
3668 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3669 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3670 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3671 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3672 	NDFREE(&nd, NDF_ONLY_PNBUF);
3673 	vput(nd.ni_dvp);
3674 	if (!error)
3675 		vput(nd.ni_vp);
3676 	vn_finished_write(mp);
3677 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3678 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3679 	return (error);
3680 }
3681 
3682 /*
3683  * Remove a directory file.
3684  */
3685 #ifndef _SYS_SYSPROTO_H_
3686 struct rmdir_args {
3687 	char	*path;
3688 };
3689 #endif
3690 /* ARGSUSED */
3691 int
3692 rmdir(td, uap)
3693 	struct thread *td;
3694 	struct rmdir_args /* {
3695 		syscallarg(char *) path;
3696 	} */ *uap;
3697 {
3698 	struct mount *mp;
3699 	struct vnode *vp;
3700 	int error;
3701 	struct nameidata nd;
3702 
3703 restart:
3704 	bwillwrite();
3705 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3706 	    SCARG(uap, path), td);
3707 	if ((error = namei(&nd)) != 0)
3708 		return (error);
3709 	vp = nd.ni_vp;
3710 	if (vp->v_type != VDIR) {
3711 		error = ENOTDIR;
3712 		goto out;
3713 	}
3714 	/*
3715 	 * No rmdir "." please.
3716 	 */
3717 	if (nd.ni_dvp == vp) {
3718 		error = EINVAL;
3719 		goto out;
3720 	}
3721 	/*
3722 	 * The root of a mounted filesystem cannot be deleted.
3723 	 */
3724 	if (vp->v_flag & VROOT) {
3725 		error = EBUSY;
3726 		goto out;
3727 	}
3728 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3729 		NDFREE(&nd, NDF_ONLY_PNBUF);
3730 		if (nd.ni_dvp == vp)
3731 			vrele(nd.ni_dvp);
3732 		else
3733 			vput(nd.ni_dvp);
3734 		vput(vp);
3735 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3736 			return (error);
3737 		goto restart;
3738 	}
3739 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3740 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3741 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3742 	vn_finished_write(mp);
3743 out:
3744 	NDFREE(&nd, NDF_ONLY_PNBUF);
3745 	if (nd.ni_dvp == vp)
3746 		vrele(nd.ni_dvp);
3747 	else
3748 		vput(nd.ni_dvp);
3749 	vput(vp);
3750 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3751 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3752 	return (error);
3753 }
3754 
3755 #ifdef COMPAT_43
3756 /*
3757  * Read a block of directory entries in a filesystem independent format.
3758  */
3759 #ifndef _SYS_SYSPROTO_H_
3760 struct ogetdirentries_args {
3761 	int	fd;
3762 	char	*buf;
3763 	u_int	count;
3764 	long	*basep;
3765 };
3766 #endif
3767 int
3768 ogetdirentries(td, uap)
3769 	struct thread *td;
3770 	register struct ogetdirentries_args /* {
3771 		syscallarg(int) fd;
3772 		syscallarg(char *) buf;
3773 		syscallarg(u_int) count;
3774 		syscallarg(long *) basep;
3775 	} */ *uap;
3776 {
3777 	struct vnode *vp;
3778 	struct file *fp;
3779 	struct uio auio, kuio;
3780 	struct iovec aiov, kiov;
3781 	struct dirent *dp, *edp;
3782 	caddr_t dirbuf;
3783 	int error, eofflag, readcnt;
3784 	long loff;
3785 
3786 	/* XXX arbitrary sanity limit on `count'. */
3787 	if (SCARG(uap, count) > 64 * 1024)
3788 		return (EINVAL);
3789 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3790 		return (error);
3791 	if ((fp->f_flag & FREAD) == 0) {
3792 		fdrop(fp, td);
3793 		return (EBADF);
3794 	}
3795 	vp = (struct vnode *)fp->f_data;
3796 unionread:
3797 	if (vp->v_type != VDIR) {
3798 		fdrop(fp, td);
3799 		return (EINVAL);
3800 	}
3801 	aiov.iov_base = SCARG(uap, buf);
3802 	aiov.iov_len = SCARG(uap, count);
3803 	auio.uio_iov = &aiov;
3804 	auio.uio_iovcnt = 1;
3805 	auio.uio_rw = UIO_READ;
3806 	auio.uio_segflg = UIO_USERSPACE;
3807 	auio.uio_td = td;
3808 	auio.uio_resid = SCARG(uap, count);
3809 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3810 	loff = auio.uio_offset = fp->f_offset;
3811 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3812 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3813 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3814 			    NULL, NULL);
3815 			fp->f_offset = auio.uio_offset;
3816 		} else
3817 #	endif
3818 	{
3819 		kuio = auio;
3820 		kuio.uio_iov = &kiov;
3821 		kuio.uio_segflg = UIO_SYSSPACE;
3822 		kiov.iov_len = SCARG(uap, count);
3823 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3824 		kiov.iov_base = dirbuf;
3825 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3826 			    NULL, NULL);
3827 		fp->f_offset = kuio.uio_offset;
3828 		if (error == 0) {
3829 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3830 			edp = (struct dirent *)&dirbuf[readcnt];
3831 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3832 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3833 					/*
3834 					 * The expected low byte of
3835 					 * dp->d_namlen is our dp->d_type.
3836 					 * The high MBZ byte of dp->d_namlen
3837 					 * is our dp->d_namlen.
3838 					 */
3839 					dp->d_type = dp->d_namlen;
3840 					dp->d_namlen = 0;
3841 #				else
3842 					/*
3843 					 * The dp->d_type is the high byte
3844 					 * of the expected dp->d_namlen,
3845 					 * so must be zero'ed.
3846 					 */
3847 					dp->d_type = 0;
3848 #				endif
3849 				if (dp->d_reclen > 0) {
3850 					dp = (struct dirent *)
3851 					    ((char *)dp + dp->d_reclen);
3852 				} else {
3853 					error = EIO;
3854 					break;
3855 				}
3856 			}
3857 			if (dp >= edp)
3858 				error = uiomove(dirbuf, readcnt, &auio);
3859 		}
3860 		FREE(dirbuf, M_TEMP);
3861 	}
3862 	VOP_UNLOCK(vp, 0, td);
3863 	if (error) {
3864 		fdrop(fp, td);
3865 		return (error);
3866 	}
3867 	if (SCARG(uap, count) == auio.uio_resid) {
3868 		if (union_dircheckp) {
3869 			error = union_dircheckp(td, &vp, fp);
3870 			if (error == -1)
3871 				goto unionread;
3872 			if (error) {
3873 				fdrop(fp, td);
3874 				return (error);
3875 			}
3876 		}
3877 		if ((vp->v_flag & VROOT) &&
3878 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3879 			struct vnode *tvp = vp;
3880 			vp = vp->v_mount->mnt_vnodecovered;
3881 			VREF(vp);
3882 			fp->f_data = (caddr_t) vp;
3883 			fp->f_offset = 0;
3884 			vrele(tvp);
3885 			goto unionread;
3886 		}
3887 	}
3888 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3889 	    sizeof(long));
3890 	fdrop(fp, td);
3891 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3892 	return (error);
3893 }
3894 #endif /* COMPAT_43 */
3895 
3896 /*
3897  * Read a block of directory entries in a filesystem independent format.
3898  */
3899 #ifndef _SYS_SYSPROTO_H_
3900 struct getdirentries_args {
3901 	int	fd;
3902 	char	*buf;
3903 	u_int	count;
3904 	long	*basep;
3905 };
3906 #endif
3907 int
3908 getdirentries(td, uap)
3909 	struct thread *td;
3910 	register struct getdirentries_args /* {
3911 		syscallarg(int) fd;
3912 		syscallarg(char *) buf;
3913 		syscallarg(u_int) count;
3914 		syscallarg(long *) basep;
3915 	} */ *uap;
3916 {
3917 	struct vnode *vp;
3918 	struct file *fp;
3919 	struct uio auio;
3920 	struct iovec aiov;
3921 	long loff;
3922 	int error, eofflag;
3923 
3924 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3925 		return (error);
3926 	if ((fp->f_flag & FREAD) == 0) {
3927 		fdrop(fp, td);
3928 		return (EBADF);
3929 	}
3930 	vp = (struct vnode *)fp->f_data;
3931 unionread:
3932 	if (vp->v_type != VDIR) {
3933 		fdrop(fp, td);
3934 		return (EINVAL);
3935 	}
3936 	aiov.iov_base = SCARG(uap, buf);
3937 	aiov.iov_len = SCARG(uap, count);
3938 	auio.uio_iov = &aiov;
3939 	auio.uio_iovcnt = 1;
3940 	auio.uio_rw = UIO_READ;
3941 	auio.uio_segflg = UIO_USERSPACE;
3942 	auio.uio_td = td;
3943 	auio.uio_resid = SCARG(uap, count);
3944 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3945 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3946 	loff = auio.uio_offset = fp->f_offset;
3947 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3948 	fp->f_offset = auio.uio_offset;
3949 	VOP_UNLOCK(vp, 0, td);
3950 	if (error) {
3951 		fdrop(fp, td);
3952 		return (error);
3953 	}
3954 	if (SCARG(uap, count) == auio.uio_resid) {
3955 		if (union_dircheckp) {
3956 			error = union_dircheckp(td, &vp, fp);
3957 			if (error == -1)
3958 				goto unionread;
3959 			if (error) {
3960 				fdrop(fp, td);
3961 				return (error);
3962 			}
3963 		}
3964 		if ((vp->v_flag & VROOT) &&
3965 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3966 			struct vnode *tvp = vp;
3967 			vp = vp->v_mount->mnt_vnodecovered;
3968 			VREF(vp);
3969 			fp->f_data = (caddr_t) vp;
3970 			fp->f_offset = 0;
3971 			vrele(tvp);
3972 			goto unionread;
3973 		}
3974 	}
3975 	if (SCARG(uap, basep) != NULL) {
3976 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3977 		    sizeof(long));
3978 	}
3979 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3980 	fdrop(fp, td);
3981 	return (error);
3982 }
3983 #ifndef _SYS_SYSPROTO_H_
3984 struct getdents_args {
3985 	int fd;
3986 	char *buf;
3987 	size_t count;
3988 };
3989 #endif
3990 int
3991 getdents(td, uap)
3992 	struct thread *td;
3993 	register struct getdents_args /* {
3994 		syscallarg(int) fd;
3995 		syscallarg(char *) buf;
3996 		syscallarg(u_int) count;
3997 	} */ *uap;
3998 {
3999 	struct getdirentries_args ap;
4000 	ap.fd = uap->fd;
4001 	ap.buf = uap->buf;
4002 	ap.count = uap->count;
4003 	ap.basep = NULL;
4004 	return getdirentries(td, &ap);
4005 }
4006 
4007 /*
4008  * Set the mode mask for creation of filesystem nodes.
4009  *
4010  * MP SAFE
4011  */
4012 #ifndef _SYS_SYSPROTO_H_
4013 struct umask_args {
4014 	int	newmask;
4015 };
4016 #endif
4017 int
4018 umask(td, uap)
4019 	struct thread *td;
4020 	struct umask_args /* {
4021 		syscallarg(int) newmask;
4022 	} */ *uap;
4023 {
4024 	register struct filedesc *fdp;
4025 
4026 	FILEDESC_LOCK(td->td_proc->p_fd);
4027 	fdp = td->td_proc->p_fd;
4028 	td->td_retval[0] = fdp->fd_cmask;
4029 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
4030 	FILEDESC_UNLOCK(td->td_proc->p_fd);
4031 	return (0);
4032 }
4033 
4034 /*
4035  * Void all references to file by ripping underlying filesystem
4036  * away from vnode.
4037  */
4038 #ifndef _SYS_SYSPROTO_H_
4039 struct revoke_args {
4040 	char	*path;
4041 };
4042 #endif
4043 /* ARGSUSED */
4044 int
4045 revoke(td, uap)
4046 	struct thread *td;
4047 	register struct revoke_args /* {
4048 		syscallarg(char *) path;
4049 	} */ *uap;
4050 {
4051 	struct mount *mp;
4052 	struct vnode *vp;
4053 	struct vattr vattr;
4054 	int error;
4055 	struct nameidata nd;
4056 
4057 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
4058 	    td);
4059 	if ((error = namei(&nd)) != 0)
4060 		return (error);
4061 	vp = nd.ni_vp;
4062 	NDFREE(&nd, NDF_ONLY_PNBUF);
4063 	if (vp->v_type != VCHR) {
4064 		vput(vp);
4065 		return (EINVAL);
4066 	}
4067 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
4068 	if (error) {
4069 		vput(vp);
4070 		return (error);
4071 	}
4072 	VOP_UNLOCK(vp, 0, td);
4073 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4074 		error = suser_cred(td->td_ucred, PRISON_ROOT);
4075 		if (error)
4076 			goto out;
4077 	}
4078 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4079 		goto out;
4080 	if (vcount(vp) > 1)
4081 		VOP_REVOKE(vp, REVOKEALL);
4082 	vn_finished_write(mp);
4083 out:
4084 	vrele(vp);
4085 	return (error);
4086 }
4087 
4088 /*
4089  * Convert a user file descriptor to a kernel file entry.
4090  * The file entry is locked upon returning.
4091  */
4092 int
4093 getvnode(fdp, fd, fpp)
4094 	struct filedesc *fdp;
4095 	int fd;
4096 	struct file **fpp;
4097 {
4098 	int error;
4099 	struct file *fp;
4100 
4101 	fp = NULL;
4102 	if (fdp == NULL)
4103 		error = EBADF;
4104 	else {
4105 		FILEDESC_LOCK(fdp);
4106 		if ((u_int)fd >= fdp->fd_nfiles ||
4107 		    (fp = fdp->fd_ofiles[fd]) == NULL)
4108 			error = EBADF;
4109 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
4110 			fp = NULL;
4111 			error = EINVAL;
4112 		} else {
4113 			fhold(fp);
4114 			error = 0;
4115 		}
4116 		FILEDESC_UNLOCK(fdp);
4117 	}
4118 	*fpp = fp;
4119 	return (error);
4120 }
4121 /*
4122  * Get (NFS) file handle
4123  */
4124 #ifndef _SYS_SYSPROTO_H_
4125 struct getfh_args {
4126 	char	*fname;
4127 	fhandle_t *fhp;
4128 };
4129 #endif
4130 int
4131 getfh(td, uap)
4132 	struct thread *td;
4133 	register struct getfh_args *uap;
4134 {
4135 	struct nameidata nd;
4136 	fhandle_t fh;
4137 	register struct vnode *vp;
4138 	int error;
4139 
4140 	/*
4141 	 * Must be super user
4142 	 */
4143 	error = suser(td);
4144 	if (error)
4145 		return (error);
4146 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
4147 	error = namei(&nd);
4148 	if (error)
4149 		return (error);
4150 	NDFREE(&nd, NDF_ONLY_PNBUF);
4151 	vp = nd.ni_vp;
4152 	bzero(&fh, sizeof(fh));
4153 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4154 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4155 	vput(vp);
4156 	if (error)
4157 		return (error);
4158 	error = copyout(&fh, uap->fhp, sizeof (fh));
4159 	return (error);
4160 }
4161 
4162 /*
4163  * syscall for the rpc.lockd to use to translate a NFS file handle into
4164  * an open descriptor.
4165  *
4166  * warning: do not remove the suser() call or this becomes one giant
4167  * security hole.
4168  */
4169 #ifndef _SYS_SYSPROTO_H_
4170 struct fhopen_args {
4171 	const struct fhandle *u_fhp;
4172 	int flags;
4173 };
4174 #endif
4175 int
4176 fhopen(td, uap)
4177 	struct thread *td;
4178 	struct fhopen_args /* {
4179 		syscallarg(const struct fhandle *) u_fhp;
4180 		syscallarg(int) flags;
4181 	} */ *uap;
4182 {
4183 	struct proc *p = td->td_proc;
4184 	struct mount *mp;
4185 	struct vnode *vp;
4186 	struct fhandle fhp;
4187 	struct vattr vat;
4188 	struct vattr *vap = &vat;
4189 	struct flock lf;
4190 	struct file *fp;
4191 	register struct filedesc *fdp = p->p_fd;
4192 	int fmode, mode, error, type;
4193 	struct file *nfp;
4194 	int indx;
4195 
4196 	/*
4197 	 * Must be super user
4198 	 */
4199 	error = suser(td);
4200 	if (error)
4201 		return (error);
4202 
4203 	fmode = FFLAGS(SCARG(uap, flags));
4204 	/* why not allow a non-read/write open for our lockd? */
4205 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4206 		return (EINVAL);
4207 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
4208 	if (error)
4209 		return(error);
4210 	/* find the mount point */
4211 	mp = vfs_getvfs(&fhp.fh_fsid);
4212 	if (mp == NULL)
4213 		return (ESTALE);
4214 	/* now give me my vnode, it gets returned to me locked */
4215 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4216 	if (error)
4217 		return (error);
4218  	/*
4219 	 * from now on we have to make sure not
4220 	 * to forget about the vnode
4221 	 * any error that causes an abort must vput(vp)
4222 	 * just set error = err and 'goto bad;'.
4223 	 */
4224 
4225 	/*
4226 	 * from vn_open
4227 	 */
4228 	if (vp->v_type == VLNK) {
4229 		error = EMLINK;
4230 		goto bad;
4231 	}
4232 	if (vp->v_type == VSOCK) {
4233 		error = EOPNOTSUPP;
4234 		goto bad;
4235 	}
4236 	mode = 0;
4237 	if (fmode & (FWRITE | O_TRUNC)) {
4238 		if (vp->v_type == VDIR) {
4239 			error = EISDIR;
4240 			goto bad;
4241 		}
4242 		error = vn_writechk(vp);
4243 		if (error)
4244 			goto bad;
4245 		mode |= VWRITE;
4246 	}
4247 	if (fmode & FREAD)
4248 		mode |= VREAD;
4249 	if (mode) {
4250 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4251 		if (error)
4252 			goto bad;
4253 	}
4254 	if (fmode & O_TRUNC) {
4255 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4256 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4257 			vrele(vp);
4258 			return (error);
4259 		}
4260 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4261 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4262 		VATTR_NULL(vap);
4263 		vap->va_size = 0;
4264 		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4265 		vn_finished_write(mp);
4266 		if (error)
4267 			goto bad;
4268 	}
4269 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
4270 	if (error)
4271 		goto bad;
4272 	/*
4273 	 * Make sure that a VM object is created for VMIO support.
4274 	 */
4275 	if (vn_canvmio(vp) == TRUE) {
4276 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
4277 			goto bad;
4278 	}
4279 	if (fmode & FWRITE)
4280 		vp->v_writecount++;
4281 
4282 	/*
4283 	 * end of vn_open code
4284 	 */
4285 
4286 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4287 		if (fmode & FWRITE)
4288 			vp->v_writecount--;
4289 		goto bad;
4290 	}
4291 	fp = nfp;
4292 
4293 	/*
4294 	 * Hold an extra reference to avoid having fp ripped out
4295 	 * from under us while we block in the lock op
4296 	 */
4297 	fhold(fp);
4298 	nfp->f_data = (caddr_t)vp;
4299 	nfp->f_flag = fmode & FMASK;
4300 	nfp->f_ops = &vnops;
4301 	nfp->f_type = DTYPE_VNODE;
4302 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4303 		lf.l_whence = SEEK_SET;
4304 		lf.l_start = 0;
4305 		lf.l_len = 0;
4306 		if (fmode & O_EXLOCK)
4307 			lf.l_type = F_WRLCK;
4308 		else
4309 			lf.l_type = F_RDLCK;
4310 		type = F_FLOCK;
4311 		if ((fmode & FNONBLOCK) == 0)
4312 			type |= F_WAIT;
4313 		VOP_UNLOCK(vp, 0, td);
4314 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4315 			/*
4316 			 * The lock request failed.  Normally close the
4317 			 * descriptor but handle the case where someone might
4318 			 * have dup()d or close()d it when we weren't looking.
4319 			 */
4320 			FILEDESC_LOCK(fdp);
4321 			if (fdp->fd_ofiles[indx] == fp) {
4322 				fdp->fd_ofiles[indx] = NULL;
4323 				FILEDESC_UNLOCK(fdp);
4324 				fdrop(fp, td);
4325 			} else
4326 				FILEDESC_UNLOCK(fdp);
4327 			/*
4328 			 * release our private reference
4329 			 */
4330 			fdrop(fp, td);
4331 			return(error);
4332 		}
4333 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4334 		fp->f_flag |= FHASLOCK;
4335 	}
4336 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4337 		vfs_object_create(vp, td, td->td_ucred);
4338 
4339 	VOP_UNLOCK(vp, 0, td);
4340 	fdrop(fp, td);
4341 	td->td_retval[0] = indx;
4342 	return (0);
4343 
4344 bad:
4345 	vput(vp);
4346 	return (error);
4347 }
4348 
4349 /*
4350  * Stat an (NFS) file handle.
4351  */
4352 #ifndef _SYS_SYSPROTO_H_
4353 struct fhstat_args {
4354 	struct fhandle *u_fhp;
4355 	struct stat *sb;
4356 };
4357 #endif
4358 int
4359 fhstat(td, uap)
4360 	struct thread *td;
4361 	register struct fhstat_args /* {
4362 		syscallarg(struct fhandle *) u_fhp;
4363 		syscallarg(struct stat *) sb;
4364 	} */ *uap;
4365 {
4366 	struct stat sb;
4367 	fhandle_t fh;
4368 	struct mount *mp;
4369 	struct vnode *vp;
4370 	int error;
4371 
4372 	/*
4373 	 * Must be super user
4374 	 */
4375 	error = suser(td);
4376 	if (error)
4377 		return (error);
4378 
4379 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
4380 	if (error)
4381 		return (error);
4382 
4383 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4384 		return (ESTALE);
4385 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4386 		return (error);
4387 	error = vn_stat(vp, &sb, td);
4388 	vput(vp);
4389 	if (error)
4390 		return (error);
4391 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
4392 	return (error);
4393 }
4394 
4395 /*
4396  * Implement fstatfs() for (NFS) file handles.
4397  */
4398 #ifndef _SYS_SYSPROTO_H_
4399 struct fhstatfs_args {
4400 	struct fhandle *u_fhp;
4401 	struct statfs *buf;
4402 };
4403 #endif
4404 int
4405 fhstatfs(td, uap)
4406 	struct thread *td;
4407 	struct fhstatfs_args /* {
4408 		syscallarg(struct fhandle) *u_fhp;
4409 		syscallarg(struct statfs) *buf;
4410 	} */ *uap;
4411 {
4412 	struct statfs *sp;
4413 	struct mount *mp;
4414 	struct vnode *vp;
4415 	struct statfs sb;
4416 	fhandle_t fh;
4417 	int error;
4418 
4419 	/*
4420 	 * Must be super user
4421 	 */
4422 	error = suser(td);
4423 	if (error)
4424 		return (error);
4425 
4426 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
4427 		return (error);
4428 
4429 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4430 		return (ESTALE);
4431 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4432 		return (error);
4433 	mp = vp->v_mount;
4434 	sp = &mp->mnt_stat;
4435 	vput(vp);
4436 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4437 		return (error);
4438 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4439 	if (suser(td)) {
4440 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
4441 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4442 		sp = &sb;
4443 	}
4444 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
4445 }
4446 
4447 /*
4448  * Syscall to push extended attribute configuration information into the
4449  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4450  * a command (int cmd), and attribute name and misc data.  For now, the
4451  * attribute name is left in userspace for consumption by the VFS_op.
4452  * It will probably be changed to be copied into sysspace by the
4453  * syscall in the future, once issues with various consumers of the
4454  * attribute code have raised their hands.
4455  *
4456  * Currently this is used only by UFS Extended Attributes.
4457  */
4458 int
4459 extattrctl(td, uap)
4460 	struct thread *td;
4461 	struct extattrctl_args /* {
4462 		syscallarg(const char *) path;
4463 		syscallarg(int) cmd;
4464 		syscallarg(const char *) filename;
4465 		syscallarg(int) attrnamespace;
4466 		syscallarg(const char *) attrname;
4467 	} */ *uap;
4468 {
4469 	struct vnode *filename_vp;
4470 	struct nameidata nd;
4471 	struct mount *mp, *mp_writable;
4472 	char attrname[EXTATTR_MAXNAMELEN];
4473 	int error;
4474 
4475 	/*
4476 	 * uap->attrname is not always defined.  We check again later when we
4477 	 * invoke the VFS call so as to pass in NULL there if needed.
4478 	 */
4479 	if (uap->attrname != NULL) {
4480 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4481 		    NULL);
4482 		if (error)
4483 			return (error);
4484 	}
4485 
4486 	/*
4487 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4488 	 * which VFS_EXTATTRCTL() will later release.
4489 	 */
4490 	filename_vp = NULL;
4491 	if (uap->filename != NULL) {
4492 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4493 		    uap->filename, td);
4494 		if ((error = namei(&nd)) != 0)
4495 			return (error);
4496 		filename_vp = nd.ni_vp;
4497 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4498 	}
4499 
4500 	/* uap->path is always defined. */
4501 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4502 	if ((error = namei(&nd)) != 0) {
4503 		if (filename_vp != NULL)
4504 			vput(filename_vp);
4505 		return (error);
4506 	}
4507 	mp = nd.ni_vp->v_mount;
4508 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4509 	NDFREE(&nd, 0);
4510 	if (error) {
4511 		if (filename_vp != NULL)
4512 			vput(filename_vp);
4513 		return (error);
4514 	}
4515 
4516 	if (uap->attrname != NULL) {
4517 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4518 		    uap->attrnamespace, attrname, td);
4519 	} else {
4520 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4521 		    uap->attrnamespace, NULL, td);
4522 	}
4523 
4524 	vn_finished_write(mp_writable);
4525 	/*
4526 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4527 	 * filename_vp, so vrele it if it is defined.
4528 	 */
4529 	if (filename_vp != NULL)
4530 		vrele(filename_vp);
4531 
4532 	return (error);
4533 }
4534 
4535 /*-
4536  * Set a named extended attribute on a file or directory
4537  *
4538  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4539  *            kernelspace string pointer "attrname", userspace buffer
4540  *            pointer "data", buffer length "nbytes", thread "td".
4541  * Returns: 0 on success, an error number otherwise
4542  * Locks: none
4543  * References: vp must be a valid reference for the duration of the call
4544  */
4545 static int
4546 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4547     void *data, size_t nbytes, struct thread *td)
4548 {
4549 	struct mount *mp;
4550 	struct uio auio;
4551 	struct iovec aiov;
4552 	ssize_t cnt;
4553 	int error;
4554 
4555 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4556 		return (error);
4557 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4558 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4559 
4560 	aiov.iov_base = data;
4561 	aiov.iov_len = nbytes;
4562 	auio.uio_iov = &aiov;
4563 	auio.uio_iovcnt = 1;
4564 	auio.uio_offset = 0;
4565 	if (nbytes > INT_MAX) {
4566 		error = EINVAL;
4567 		goto done;
4568 	}
4569 	auio.uio_resid = nbytes;
4570 	auio.uio_rw = UIO_WRITE;
4571 	auio.uio_segflg = UIO_USERSPACE;
4572 	auio.uio_td = td;
4573 	cnt = nbytes;
4574 
4575 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4576 	    td->td_ucred, td);
4577 	cnt -= auio.uio_resid;
4578 	td->td_retval[0] = cnt;
4579 
4580 done:
4581 	VOP_UNLOCK(vp, 0, td);
4582 	vn_finished_write(mp);
4583 	return (error);
4584 }
4585 
4586 int
4587 extattr_set_file(td, uap)
4588 	struct thread *td;
4589 	struct extattr_set_file_args /* {
4590 		syscallarg(const char *) path;
4591 		syscallarg(int) attrnamespace;
4592 		syscallarg(const char *) attrname;
4593 		syscallarg(void *) data;
4594 		syscallarg(size_t) nbytes;
4595 	} */ *uap;
4596 {
4597 	struct nameidata nd;
4598 	char attrname[EXTATTR_MAXNAMELEN];
4599 	int error;
4600 
4601 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4602 	if (error)
4603 		return (error);
4604 
4605 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4606 	if ((error = namei(&nd)) != 0)
4607 		return (error);
4608 	NDFREE(&nd, NDF_ONLY_PNBUF);
4609 
4610 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4611 	    uap->data, uap->nbytes, td);
4612 
4613 	vrele(nd.ni_vp);
4614 	return (error);
4615 }
4616 
4617 int
4618 extattr_set_fd(td, uap)
4619 	struct thread *td;
4620 	struct extattr_set_fd_args /* {
4621 		syscallarg(int) fd;
4622 		syscallarg(int) attrnamespace;
4623 		syscallarg(const char *) attrname;
4624 		syscallarg(void *) data;
4625 		syscallarg(size_t) nbytes;
4626 	} */ *uap;
4627 {
4628 	struct file *fp;
4629 	char attrname[EXTATTR_MAXNAMELEN];
4630 	int error;
4631 
4632 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4633 	if (error)
4634 		return (error);
4635 
4636 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4637 		return (error);
4638 
4639 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4640 	    attrname, uap->data, uap->nbytes, td);
4641 	fdrop(fp, td);
4642 
4643 	return (error);
4644 }
4645 
4646 /*-
4647  * Get a named extended attribute on a file or directory
4648  *
4649  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4650  *            kernelspace string pointer "attrname", userspace buffer
4651  *            pointer "data", buffer length "nbytes", thread "td".
4652  * Returns: 0 on success, an error number otherwise
4653  * Locks: none
4654  * References: vp must be a valid reference for the duration of the call
4655  */
4656 static int
4657 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4658     void *data, size_t nbytes, struct thread *td)
4659 {
4660 	struct uio auio, *auiop;
4661 	struct iovec aiov;
4662 	ssize_t cnt;
4663 	size_t size, *sizep;
4664 	int error;
4665 
4666 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4667 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4668 
4669 	/*
4670 	 * Slightly unusual semantics: if the user provides a NULL data
4671 	 * pointer, they don't want to receive the data, just the
4672 	 * maximum read length.
4673 	 */
4674 	auiop = NULL;
4675 	sizep = NULL;
4676 	cnt = 0;
4677 	if (data != NULL) {
4678 		aiov.iov_base = data;
4679 		aiov.iov_len = nbytes;
4680 		auio.uio_iov = &aiov;
4681 		auio.uio_offset = 0;
4682 		if (nbytes > INT_MAX) {
4683 			error = EINVAL;
4684 			goto done;
4685 		}
4686 		auio.uio_resid = nbytes;
4687 		auio.uio_rw = UIO_READ;
4688 		auio.uio_segflg = UIO_USERSPACE;
4689 		auio.uio_td = td;
4690 		auiop = &auio;
4691 		cnt = nbytes;
4692 	} else
4693 		sizep = &size;
4694 
4695 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4696 	    td->td_ucred, td);
4697 
4698 	if (auiop != NULL) {
4699 		cnt -= auio.uio_resid;
4700 		td->td_retval[0] = cnt;
4701 	} else
4702 		td->td_retval[0] = size;
4703 
4704 done:
4705 	VOP_UNLOCK(vp, 0, td);
4706 	return (error);
4707 }
4708 
4709 int
4710 extattr_get_file(td, uap)
4711 	struct thread *td;
4712 	struct extattr_get_file_args /* {
4713 		syscallarg(const char *) path;
4714 		syscallarg(int) attrnamespace;
4715 		syscallarg(const char *) attrname;
4716 		syscallarg(void *) data;
4717 		syscallarg(size_t) nbytes;
4718 	} */ *uap;
4719 {
4720 	struct nameidata nd;
4721 	char attrname[EXTATTR_MAXNAMELEN];
4722 	int error;
4723 
4724 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4725 	if (error)
4726 		return (error);
4727 
4728 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4729 	if ((error = namei(&nd)) != 0)
4730 		return (error);
4731 	NDFREE(&nd, NDF_ONLY_PNBUF);
4732 
4733 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4734 	    uap->data, uap->nbytes, td);
4735 
4736 	vrele(nd.ni_vp);
4737 	return (error);
4738 }
4739 
4740 int
4741 extattr_get_fd(td, uap)
4742 	struct thread *td;
4743 	struct extattr_get_fd_args /* {
4744 		syscallarg(int) fd;
4745 		syscallarg(int) attrnamespace;
4746 		syscallarg(const char *) attrname;
4747 		syscallarg(void *) data;
4748 		syscallarg(size_t) nbytes;
4749 	} */ *uap;
4750 {
4751 	struct file *fp;
4752 	char attrname[EXTATTR_MAXNAMELEN];
4753 	int error;
4754 
4755 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4756 	if (error)
4757 		return (error);
4758 
4759 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4760 		return (error);
4761 
4762 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4763 	    attrname, uap->data, uap->nbytes, td);
4764 
4765 	fdrop(fp, td);
4766 	return (error);
4767 }
4768 
4769 /*
4770  * extattr_delete_vp(): Delete a named extended attribute on a file or
4771  *                      directory
4772  *
4773  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4774  *            kernelspace string pointer "attrname", proc "p"
4775  * Returns: 0 on success, an error number otherwise
4776  * Locks: none
4777  * References: vp must be a valid reference for the duration of the call
4778  */
4779 static int
4780 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4781     struct thread *td)
4782 {
4783 	struct mount *mp;
4784 	int error;
4785 
4786 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4787 		return (error);
4788 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4789 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4790 
4791 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4792 	    td);
4793 
4794 	VOP_UNLOCK(vp, 0, td);
4795 	vn_finished_write(mp);
4796 	return (error);
4797 }
4798 
4799 int
4800 extattr_delete_file(td, uap)
4801 	struct thread *td;
4802 	struct extattr_delete_file_args /* {
4803 		syscallarg(const char *) path;
4804 		syscallarg(int) attrnamespace;
4805 		syscallarg(const char *) attrname;
4806 	} */ *uap;
4807 {
4808 	struct nameidata nd;
4809 	char attrname[EXTATTR_MAXNAMELEN];
4810 	int error;
4811 
4812 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4813 	if (error)
4814 		return(error);
4815 
4816 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4817 	if ((error = namei(&nd)) != 0)
4818 		return(error);
4819 	NDFREE(&nd, NDF_ONLY_PNBUF);
4820 
4821 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4822 
4823 	vrele(nd.ni_vp);
4824 	return(error);
4825 }
4826 
4827 int
4828 extattr_delete_fd(td, uap)
4829 	struct thread *td;
4830 	struct extattr_delete_fd_args /* {
4831 		syscallarg(int) fd;
4832 		syscallarg(int) attrnamespace;
4833 		syscallarg(const char *) attrname;
4834 	} */ *uap;
4835 {
4836 	struct file *fp;
4837 	struct vnode *vp;
4838 	char attrname[EXTATTR_MAXNAMELEN];
4839 	int error;
4840 
4841 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4842 	if (error)
4843 		return (error);
4844 
4845 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4846 		return (error);
4847 	vp = (struct vnode *)fp->f_data;
4848 
4849 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4850 	    uap->attrnamespace, attrname, td);
4851 
4852 	fdrop(fp, td);
4853 	return (error);
4854 }
4855