xref: /freebsd/sys/kern/vfs_extattr.c (revision ee2ea5ceafed78a5bd9810beb9e3ca927180c226)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 #include <machine/stdarg.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_page.h>
77 #include <vm/uma.h>
78 
79 static int change_dir(struct nameidata *ndp, struct thread *td);
80 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 static void vfs_freeopts(struct vfsoptlist *opt);
91 static int vfs_nmount(struct thread *td, int, struct uio *);
92 
93 static int	usermount = 0;	/* if 1, non-root can mount fs. */
94 
95 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
96 
97 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
98 
99 /*
100  * Virtual File System System Calls
101  */
102 
103 #ifndef _SYS_SYSPROTO_H_
104 struct nmount_args {
105 	struct iovec    *iovp;
106 	unsigned int    iovcnt;
107 	int             flags;
108 };
109 #endif
110 /* ARGSUSED */
111 int
112 nmount(td, uap)
113 	struct thread *td;
114 	struct nmount_args /* {
115 		syscallarg(struct iovec *) iovp;
116 		syscallarg(unsigned int) iovcnt;
117 		syscallarg(int) flags;
118 	} */ *uap;
119 {
120 	struct uio auio;
121 	struct iovec *iov, *needfree;
122 	struct iovec aiov[UIO_SMALLIOV];
123 	long error, i;
124 	u_int iovlen, iovcnt;
125 
126 	iovcnt = SCARG(uap, iovcnt);
127 	iovlen = iovcnt * sizeof (struct iovec);
128 	/*
129 	 * Check that we have an even number of iovec's
130 	 * and that we have at least two options.
131 	 */
132 	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
133 		return (EINVAL);
134 
135 	if (iovcnt > UIO_SMALLIOV) {
136 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
137 		needfree = iov;
138 	} else {
139 		iov = aiov;
140 		needfree = NULL;
141 	}
142 	auio.uio_iov = iov;
143 	auio.uio_iovcnt = iovcnt;
144 	auio.uio_rw = UIO_WRITE;
145 	auio.uio_segflg = UIO_USERSPACE;
146 	auio.uio_td = td;
147 	auio.uio_offset = 0;
148 	auio.uio_resid = 0;
149 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
150 		goto finish;
151 	for (i = 0; i < iovcnt; i++) {
152 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
153 			error = EINVAL;
154 			goto finish;
155 		}
156 		auio.uio_resid += iov->iov_len;
157 		iov++;
158 	}
159 	error = vfs_nmount(td, SCARG(uap, flags), &auio);
160 finish:
161 	if (needfree != NULL)
162 		free(needfree, M_TEMP);
163 	return (error);
164 }
165 
166 /*
167  * Release all resources related to the
168  * mount options.
169  */
170 static void
171 vfs_freeopts(struct vfsoptlist *opt)
172 {
173 	free(opt->opt, M_MOUNT);
174 	free(opt->optbuf, M_MOUNT);
175 	free(opt, M_MOUNT);
176 }
177 
178 int
179 kernel_mount(iovp, iovcnt, flags)
180 	struct iovec *iovp;
181 	unsigned int iovcnt;
182 	int flags;
183 {
184 	struct uio auio;
185 	struct iovec *iov;
186 	int error, i;
187 
188 	/*
189 	 * Check that we have an even number of iovec's
190 	 * and that we have at least two options.
191 	 */
192 	if ((iovcnt & 1) || (iovcnt < 4))
193 		return (EINVAL);
194 
195 	auio.uio_iov = iovp;
196 	auio.uio_iovcnt = iovcnt;
197 	auio.uio_rw = UIO_WRITE;
198 	auio.uio_segflg = UIO_SYSSPACE;
199 	auio.uio_offset = 0;
200 	auio.uio_td = NULL;
201 	auio.uio_resid = 0;
202 	iov = iovp;
203 	for (i = 0; i < iovcnt; i++) {
204 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
205 			return (EINVAL);
206 		}
207 		auio.uio_resid += iov->iov_len;
208 		iov++;
209 	}
210 
211 	error = vfs_nmount(curthread, flags, &auio);
212 	return (error);
213 }
214 
215 int
216 kernel_vmount(int flags, ...)
217 {
218 	struct iovec *iovp;
219 	struct uio auio;
220 	va_list ap;
221 	unsigned int iovcnt, iovlen, len;
222 	const char *cp;
223 	char *buf, *pos;
224 	size_t n;
225 	int error, i;
226 
227 	len = 0;
228 	va_start(ap, flags);
229 	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
230 		len += strlen(cp) + 1;
231 	va_end(ap);
232 
233 	if (iovcnt < 4 || iovcnt & 1)
234 		return (EINVAL);
235 
236 	iovlen = iovcnt * sizeof (struct iovec);
237 	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
238 	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
239 	pos = buf;
240 	va_start(ap, flags);
241 	for (i = 0; i < iovcnt; i++) {
242 		cp = va_arg(ap, const char *);
243 		copystr(cp, pos, len - (pos - buf), &n);
244 		iovp[i].iov_base = pos;
245 		iovp[i].iov_len = n;
246 		pos += n;
247 	}
248 	va_end(ap);
249 
250 	auio.uio_iov = iovp;
251 	auio.uio_iovcnt = iovcnt;
252 	auio.uio_rw = UIO_WRITE;
253 	auio.uio_segflg = UIO_SYSSPACE;
254 	auio.uio_offset = 0;
255 	auio.uio_td = NULL;
256 	auio.uio_resid = len;
257 
258 	error = vfs_nmount(curthread, flags, &auio);
259 	FREE(iovp, M_MOUNT);
260 	FREE(buf, M_MOUNT);
261 	return (error);
262 }
263 
264 /*
265  * vfs_nmount(): actually attempt a filesystem mount.
266  */
267 static int
268 vfs_nmount(td, fsflags, fsoptions)
269 	struct thread *td;
270 	int fsflags;		/* Flags common to all filesystems. */
271 	struct uio *fsoptions;	/* Options local to the filesystem. */
272 {
273 	linker_file_t lf;
274 	struct vnode *vp;
275 	struct mount *mp;
276 	struct vfsconf *vfsp;
277 	struct iovec *cur;
278 	struct vfsoptlist *optlist;
279 	struct vfsopt *opt;
280 	char *buf, *fstype, *fspath;
281 	int error, flag = 0, kern_flag = 0, i, len, optcnt;
282 	int offset, iovcnt, fstypelen, fspathlen;
283 	struct vattr va;
284 	struct nameidata nd;
285 
286 	/*
287 	 * Allocate memory to hold the vfsopt structures.
288 	 */
289 	iovcnt = fsoptions->uio_iovcnt;
290 	optcnt = iovcnt >> 1;
291 	opt = malloc(sizeof (struct vfsopt) * optcnt,
292 	    M_MOUNT, M_WAITOK | M_ZERO);
293 
294 	/*
295 	 * Count the size of the buffer for options,
296 	 * allocate it, and fill in the vfsopt structures.
297 	 */
298 	cur = fsoptions->uio_iov;
299 	len = fsoptions->uio_resid;
300 	buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
301 
302 	optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK);
303 	optlist->opt = opt;
304 	optlist->optbuf = buf;
305 	optlist->optcnt = optcnt;
306 
307 	offset = i = 0;
308 	cur = fsoptions->uio_iov;
309 	while (i < optcnt) {
310 		opt[i].name = buf + offset;
311 		/* Ensure the name of an option is a string. */
312 		if (opt[i].name[cur->iov_len - 1] != '\0') {
313 			error = EINVAL;
314 			goto bad;
315 		}
316 		offset += cur->iov_len;
317 		cur++;
318 		opt[i].len = cur->iov_len;
319 		/*
320 		 * Prevent consumers from trying to
321 		 * read the value of a 0 length option
322 		 * by setting it to NULL.
323 		 */
324 		if (opt[i].len == 0)
325 			opt[i].value = NULL;
326 		else
327 			opt[i].value = buf + offset;
328 		offset += cur->iov_len;
329 		cur++; i++;
330 	}
331 
332 	if ((error = uiomove(buf, len, fsoptions)) != 0)
333 		goto bad;
334 
335 	/*
336 	 * We need these two options before the others,
337 	 * and they are mandatory for any filesystem.
338 	 * Ensure they are NUL terminated as well.
339 	 */
340 	fstypelen = 0;
341 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
342 	if (error || fstype[fstypelen - 1] != '\0') {
343 		error = EINVAL;
344 		goto bad;
345 	}
346 	fspathlen = 0;
347 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
348 	if (error || fspath[fspathlen - 1] != '\0') {
349 		error = EINVAL;
350 		goto bad;
351 	}
352 
353 	/*
354 	 * Be ultra-paranoid about making sure the type and fspath
355 	 * variables will fit in our mp buffers, including the
356 	 * terminating NUL.
357 	 */
358 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
359 		error = ENAMETOOLONG;
360 		goto bad;
361 	}
362 
363 	if (usermount == 0) {
364 	       	error = suser(td);
365 		if (error)
366 			goto bad;
367 	}
368 	/*
369 	 * Do not allow NFS export by non-root users.
370 	 */
371 	if (fsflags & MNT_EXPORTED) {
372 		error = suser(td);
373 		if (error)
374 			goto bad;
375 	}
376 	/*
377 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
378 	 */
379 	if (suser(td))
380 		fsflags |= MNT_NOSUID | MNT_NODEV;
381 	/*
382 	 * Get vnode to be covered
383 	 */
384 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
385 	if ((error = namei(&nd)) != 0)
386 		goto bad;
387 	NDFREE(&nd, NDF_ONLY_PNBUF);
388 	vp = nd.ni_vp;
389 	if (fsflags & MNT_UPDATE) {
390 		if ((vp->v_flag & VROOT) == 0) {
391 			vput(vp);
392 			error = EINVAL;
393 			goto bad;
394 		}
395 		mp = vp->v_mount;
396 		flag = mp->mnt_flag;
397 		kern_flag = mp->mnt_kern_flag;
398 		/*
399 		 * We only allow the filesystem to be reloaded if it
400 		 * is currently mounted read-only.
401 		 */
402 		if ((fsflags & MNT_RELOAD) &&
403 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
404 			vput(vp);
405 			error = EOPNOTSUPP;	/* Needs translation */
406 			goto bad;
407 		}
408 		/*
409 		 * Only root, or the user that did the original mount is
410 		 * permitted to update it.
411 		 */
412 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
413 			error = suser(td);
414 			if (error) {
415 				vput(vp);
416 				goto bad;
417 			}
418 		}
419 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
420 			vput(vp);
421 			error = EBUSY;
422 			goto bad;
423 		}
424 		mtx_lock(&vp->v_interlock);
425 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
426 			mtx_unlock(&vp->v_interlock);
427 			vfs_unbusy(mp, td);
428 			vput(vp);
429 			error = EBUSY;
430 			goto bad;
431 		}
432 		vp->v_flag |= VMOUNT;
433 		mtx_unlock(&vp->v_interlock);
434 		mp->mnt_flag |= fsflags &
435 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
436 		VOP_UNLOCK(vp, 0, td);
437 		mp->mnt_optnew = optlist;
438 		goto update;
439 	}
440 	/*
441 	 * If the user is not root, ensure that they own the directory
442 	 * onto which we are attempting to mount.
443 	 */
444 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
445 	if (error) {
446 		vput(vp);
447 		goto bad;
448 	}
449 	if (va.va_uid != td->td_ucred->cr_uid) {
450 		error = suser(td);
451 		if (error) {
452 			vput(vp);
453 			goto bad;
454 		}
455 	}
456 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
457 		vput(vp);
458 		goto bad;
459 	}
460 	if (vp->v_type != VDIR) {
461 		vput(vp);
462 		error = ENOTDIR;
463 		goto bad;
464 	}
465 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
466 		if (!strcmp(vfsp->vfc_name, fstype))
467 			break;
468 	if (vfsp == NULL) {
469 		/* Only load modules for root (very important!). */
470 		error = suser(td);
471 		if (error) {
472 			vput(vp);
473 			goto bad;
474 		}
475 		error = securelevel_gt(td->td_ucred, 0);
476 		if (error) {
477 			vput(vp);
478 			goto bad;
479 		}
480 		error = linker_load_file(fstype, &lf);
481 		if (error || lf == NULL) {
482 			vput(vp);
483 			if (lf == NULL)
484 				error = ENODEV;
485 			goto bad;
486 		}
487 		lf->userrefs++;
488 		/* Look up again to see if the VFS was loaded. */
489 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
490 			if (!strcmp(vfsp->vfc_name, fstype))
491 				break;
492 		if (vfsp == NULL) {
493 			lf->userrefs--;
494 			linker_file_unload(lf);
495 			vput(vp);
496 			error = ENODEV;
497 			goto bad;
498 		}
499 	}
500 	mtx_lock(&vp->v_interlock);
501 	if ((vp->v_flag & VMOUNT) != 0 ||
502 	    vp->v_mountedhere != NULL) {
503 		mtx_unlock(&vp->v_interlock);
504 		vput(vp);
505 		error = EBUSY;
506 		goto bad;
507 	}
508 	vp->v_flag |= VMOUNT;
509 	mtx_unlock(&vp->v_interlock);
510 
511 	/*
512 	 * Allocate and initialize the filesystem.
513 	 */
514 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
515 	TAILQ_INIT(&mp->mnt_nvnodelist);
516 	TAILQ_INIT(&mp->mnt_reservedvnlist);
517 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
518 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
519 	mp->mnt_op = vfsp->vfc_vfsops;
520 	mp->mnt_vfc = vfsp;
521 	vfsp->vfc_refcount++;
522 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
523 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
524 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
525 	mp->mnt_vnodecovered = vp;
526 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
527 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
528 	mp->mnt_iosize_max = DFLTPHYS;
529 	VOP_UNLOCK(vp, 0, td);
530 
531 	mp->mnt_opt = optlist;
532 update:
533 	/*
534 	 * Check if the fs implements the new VFS_NMOUNT()
535 	 * function, since the new system call was used.
536 	 */
537 	if (mp->mnt_op->vfs_mount != NULL) {
538 		printf("%s doesn't support the new mount syscall\n",
539 		    mp->mnt_vfc->vfc_name);
540 		mtx_lock(&vp->v_interlock);
541 		vp->v_flag &= ~VMOUNT;
542 		mtx_unlock(&vp->v_interlock);
543 		if (mp->mnt_flag & MNT_UPDATE)
544 			vfs_unbusy(mp, td);
545 		else {
546 			mp->mnt_vfc->vfc_refcount--;
547 			vfs_unbusy(mp, td);
548 			free((caddr_t)mp, M_MOUNT);
549 		}
550 		vput(vp);
551 		error = EOPNOTSUPP;
552 		goto bad;
553 	}
554 
555 	/*
556 	 * Set the mount level flags.
557 	 */
558 	if (fsflags & MNT_RDONLY)
559 		mp->mnt_flag |= MNT_RDONLY;
560 	else if (mp->mnt_flag & MNT_RDONLY)
561 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
562 	mp->mnt_flag &=~ MNT_UPDATEMASK;
563 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
564 	/*
565 	 * Mount the filesystem.
566 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
567 	 * get.  No freeing of cn_pnbuf.
568 	 */
569 	error = VFS_NMOUNT(mp, &nd, td);
570 	if (mp->mnt_flag & MNT_UPDATE) {
571 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
572 			mp->mnt_flag &= ~MNT_RDONLY;
573 		mp->mnt_flag &=~
574 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
575 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
576 		if (error) {
577 			mp->mnt_flag = flag;
578 			mp->mnt_kern_flag = kern_flag;
579 			vfs_freeopts(mp->mnt_optnew);
580 		} else {
581 			vfs_freeopts(mp->mnt_opt);
582 			mp->mnt_opt = mp->mnt_optnew;
583 		}
584 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
585 			if (mp->mnt_syncer == NULL)
586 				error = vfs_allocate_syncvnode(mp);
587 		} else {
588 			if (mp->mnt_syncer != NULL)
589 				vrele(mp->mnt_syncer);
590 			mp->mnt_syncer = NULL;
591 		}
592 		vfs_unbusy(mp, td);
593 		mtx_lock(&vp->v_interlock);
594 		vp->v_flag &= ~VMOUNT;
595 		mtx_unlock(&vp->v_interlock);
596 		vrele(vp);
597 		return (error);
598 	}
599 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
600 	/*
601 	 * Put the new filesystem on the mount list after root.
602 	 */
603 	cache_purge(vp);
604 	if (!error) {
605 		struct vnode *newdp;
606 
607 		mtx_lock(&vp->v_interlock);
608 		vp->v_flag &= ~VMOUNT;
609 		vp->v_mountedhere = mp;
610 		mtx_unlock(&vp->v_interlock);
611 		mtx_lock(&mountlist_mtx);
612 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
613 		mtx_unlock(&mountlist_mtx);
614 		if (VFS_ROOT(mp, &newdp))
615 			panic("mount: lost mount");
616 		checkdirs(vp, newdp);
617 		vput(newdp);
618 		VOP_UNLOCK(vp, 0, td);
619 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
620 			error = vfs_allocate_syncvnode(mp);
621 		vfs_unbusy(mp, td);
622 		if ((error = VFS_START(mp, 0, td)) != 0) {
623 			vrele(vp);
624 			goto bad;
625 		}
626 	} else {
627 		mtx_lock(&vp->v_interlock);
628 		vp->v_flag &= ~VMOUNT;
629 		mtx_unlock(&vp->v_interlock);
630 		mp->mnt_vfc->vfc_refcount--;
631 		vfs_unbusy(mp, td);
632 		free((caddr_t)mp, M_MOUNT);
633 		vput(vp);
634 		goto bad;
635 	}
636 	return (0);
637 bad:
638 	vfs_freeopts(optlist);
639 	return (error);
640 }
641 
642 /*
643  * Old Mount API.
644  */
645 #ifndef _SYS_SYSPROTO_H_
646 struct mount_args {
647 	char	*type;
648 	char	*path;
649 	int	flags;
650 	caddr_t	data;
651 };
652 #endif
653 /* ARGSUSED */
654 int
655 mount(td, uap)
656 	struct thread *td;
657 	struct mount_args /* {
658 		syscallarg(char *) type;
659 		syscallarg(char *) path;
660 		syscallarg(int) flags;
661 		syscallarg(caddr_t) data;
662 	} */ *uap;
663 {
664 	char *fstype;
665 	char *fspath;
666 	int error;
667 
668 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
669 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
670 
671 	/*
672 	 * vfs_mount() actually takes a kernel string for `type' and
673 	 * `path' now, so extract them.
674 	 */
675 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
676 	if (error)
677 		goto finish;
678 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
679 	if (error)
680 		goto finish;
681 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
682 	    SCARG(uap, data));
683 finish:
684 	free(fstype, M_TEMP);
685 	free(fspath, M_TEMP);
686 	return (error);
687 }
688 
689 /*
690  * vfs_mount(): actually attempt a filesystem mount.
691  *
692  * This routine is designed to be a "generic" entry point for routines
693  * that wish to mount a filesystem. All parameters except `fsdata' are
694  * pointers into kernel space. `fsdata' is currently still a pointer
695  * into userspace.
696  */
697 int
698 vfs_mount(td, fstype, fspath, fsflags, fsdata)
699 	struct thread *td;
700 	const char *fstype;
701 	char *fspath;
702 	int fsflags;
703 	void *fsdata;
704 {
705 	linker_file_t lf;
706 	struct vnode *vp;
707 	struct mount *mp;
708 	struct vfsconf *vfsp;
709 	int error, flag = 0, kern_flag = 0;
710 	struct vattr va;
711 	struct nameidata nd;
712 
713 	/*
714 	 * Be ultra-paranoid about making sure the type and fspath
715 	 * variables will fit in our mp buffers, including the
716 	 * terminating NUL.
717 	 */
718 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
719 		return (ENAMETOOLONG);
720 
721 	if (usermount == 0) {
722 		error = suser(td);
723 		if (error)
724 			return (error);
725 	}
726 	/*
727 	 * Do not allow NFS export by non-root users.
728 	 */
729 	if (fsflags & MNT_EXPORTED) {
730 		error = suser(td);
731 		if (error)
732 			return (error);
733 	}
734 	/*
735 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
736 	 */
737 	if (suser(td))
738 		fsflags |= MNT_NOSUID | MNT_NODEV;
739 	/*
740 	 * Get vnode to be covered
741 	 */
742 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
743 	if ((error = namei(&nd)) != 0)
744 		return (error);
745 	NDFREE(&nd, NDF_ONLY_PNBUF);
746 	vp = nd.ni_vp;
747 	if (fsflags & MNT_UPDATE) {
748 		if ((vp->v_flag & VROOT) == 0) {
749 			vput(vp);
750 			return (EINVAL);
751 		}
752 		mp = vp->v_mount;
753 		flag = mp->mnt_flag;
754 		kern_flag = mp->mnt_kern_flag;
755 		/*
756 		 * We only allow the filesystem to be reloaded if it
757 		 * is currently mounted read-only.
758 		 */
759 		if ((fsflags & MNT_RELOAD) &&
760 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
761 			vput(vp);
762 			return (EOPNOTSUPP);	/* Needs translation */
763 		}
764 		/*
765 		 * Only root, or the user that did the original mount is
766 		 * permitted to update it.
767 		 */
768 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
769 			error = suser(td);
770 			if (error) {
771 				vput(vp);
772 				return (error);
773 			}
774 		}
775 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
776 			vput(vp);
777 			return (EBUSY);
778 		}
779 		mtx_lock(&vp->v_interlock);
780 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
781 			mtx_unlock(&vp->v_interlock);
782 			vfs_unbusy(mp, td);
783 			vput(vp);
784 			return (EBUSY);
785 		}
786 		vp->v_flag |= VMOUNT;
787 		mtx_unlock(&vp->v_interlock);
788 		mp->mnt_flag |= fsflags &
789 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
790 		VOP_UNLOCK(vp, 0, td);
791 		goto update;
792 	}
793 	/*
794 	 * If the user is not root, ensure that they own the directory
795 	 * onto which we are attempting to mount.
796 	 */
797 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
798 	if (error) {
799 		vput(vp);
800 		return (error);
801 	}
802 	if (va.va_uid != td->td_ucred->cr_uid) {
803 		error = suser(td);
804 		if (error) {
805 			vput(vp);
806 			return (error);
807 		}
808 	}
809 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
810 		vput(vp);
811 		return (error);
812 	}
813 	if (vp->v_type != VDIR) {
814 		vput(vp);
815 		return (ENOTDIR);
816 	}
817 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
818 		if (!strcmp(vfsp->vfc_name, fstype))
819 			break;
820 	if (vfsp == NULL) {
821 		/* Only load modules for root (very important!). */
822 		error = suser(td);
823 		if (error) {
824 			vput(vp);
825 			return (error);
826 		}
827 		error = securelevel_gt(td->td_ucred, 0);
828 		if (error) {
829 			vput(vp);
830 			return (error);
831 		}
832 		error = linker_load_file(fstype, &lf);
833 		if (error || lf == NULL) {
834 			vput(vp);
835 			if (lf == NULL)
836 				error = ENODEV;
837 			return (error);
838 		}
839 		lf->userrefs++;
840 		/* Look up again to see if the VFS was loaded. */
841 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
842 			if (!strcmp(vfsp->vfc_name, fstype))
843 				break;
844 		if (vfsp == NULL) {
845 			lf->userrefs--;
846 			linker_file_unload(lf);
847 			vput(vp);
848 			return (ENODEV);
849 		}
850 	}
851 	mtx_lock(&vp->v_interlock);
852 	if ((vp->v_flag & VMOUNT) != 0 ||
853 	    vp->v_mountedhere != NULL) {
854 		mtx_unlock(&vp->v_interlock);
855 		vput(vp);
856 		return (EBUSY);
857 	}
858 	vp->v_flag |= VMOUNT;
859 	mtx_unlock(&vp->v_interlock);
860 
861 	/*
862 	 * Allocate and initialize the filesystem.
863 	 */
864 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
865 	TAILQ_INIT(&mp->mnt_nvnodelist);
866 	TAILQ_INIT(&mp->mnt_reservedvnlist);
867 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
868 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
869 	mp->mnt_op = vfsp->vfc_vfsops;
870 	mp->mnt_vfc = vfsp;
871 	vfsp->vfc_refcount++;
872 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
873 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
874 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
875 	mp->mnt_vnodecovered = vp;
876 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
877 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
878 	mp->mnt_iosize_max = DFLTPHYS;
879 	VOP_UNLOCK(vp, 0, td);
880 update:
881 	/*
882 	 * Check if the fs implements the old VFS_MOUNT()
883 	 * function, since the old system call was used.
884 	 */
885 	if (mp->mnt_op->vfs_mount == NULL) {
886 		printf("%s doesn't support the old mount syscall\n",
887 		    mp->mnt_vfc->vfc_name);
888 		mtx_lock(&vp->v_interlock);
889 		vp->v_flag &= ~VMOUNT;
890 		mtx_unlock(&vp->v_interlock);
891 		if (mp->mnt_flag & MNT_UPDATE)
892 			vfs_unbusy(mp, td);
893 		else {
894 			mp->mnt_vfc->vfc_refcount--;
895 			vfs_unbusy(mp, td);
896 			free((caddr_t)mp, M_MOUNT);
897 		}
898 		vput(vp);
899 		return (EOPNOTSUPP);
900 	}
901 
902 	/*
903 	 * Set the mount level flags.
904 	 */
905 	if (fsflags & MNT_RDONLY)
906 		mp->mnt_flag |= MNT_RDONLY;
907 	else if (mp->mnt_flag & MNT_RDONLY)
908 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
909 	mp->mnt_flag &=~ MNT_UPDATEMASK;
910 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
911 	/*
912 	 * Mount the filesystem.
913 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
914 	 * get.  No freeing of cn_pnbuf.
915 	 */
916 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
917 	if (mp->mnt_flag & MNT_UPDATE) {
918 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
919 			mp->mnt_flag &= ~MNT_RDONLY;
920 		mp->mnt_flag &=~
921 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
922 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
923 		if (error) {
924 			mp->mnt_flag = flag;
925 			mp->mnt_kern_flag = kern_flag;
926 		}
927 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
928 			if (mp->mnt_syncer == NULL)
929 				error = vfs_allocate_syncvnode(mp);
930 		} else {
931 			if (mp->mnt_syncer != NULL)
932 				vrele(mp->mnt_syncer);
933 			mp->mnt_syncer = NULL;
934 		}
935 		vfs_unbusy(mp, td);
936 		mtx_lock(&vp->v_interlock);
937 		vp->v_flag &= ~VMOUNT;
938 		mtx_unlock(&vp->v_interlock);
939 		vrele(vp);
940 		return (error);
941 	}
942 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
943 	/*
944 	 * Put the new filesystem on the mount list after root.
945 	 */
946 	cache_purge(vp);
947 	if (!error) {
948 		struct vnode *newdp;
949 
950 		mtx_lock(&vp->v_interlock);
951 		vp->v_flag &= ~VMOUNT;
952 		vp->v_mountedhere = mp;
953 		mtx_unlock(&vp->v_interlock);
954 		mtx_lock(&mountlist_mtx);
955 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
956 		mtx_unlock(&mountlist_mtx);
957 		if (VFS_ROOT(mp, &newdp))
958 			panic("mount: lost mount");
959 		checkdirs(vp, newdp);
960 		vput(newdp);
961 		VOP_UNLOCK(vp, 0, td);
962 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
963 			error = vfs_allocate_syncvnode(mp);
964 		vfs_unbusy(mp, td);
965 		if ((error = VFS_START(mp, 0, td)) != 0)
966 			vrele(vp);
967 	} else {
968 		mtx_lock(&vp->v_interlock);
969 		vp->v_flag &= ~VMOUNT;
970 		mtx_unlock(&vp->v_interlock);
971 		mp->mnt_vfc->vfc_refcount--;
972 		vfs_unbusy(mp, td);
973 		free((caddr_t)mp, M_MOUNT);
974 		vput(vp);
975 	}
976 	return (error);
977 }
978 
979 /*
980  * Scan all active processes to see if any of them have a current
981  * or root directory of `olddp'. If so, replace them with the new
982  * mount point.
983  */
984 static void
985 checkdirs(olddp, newdp)
986 	struct vnode *olddp, *newdp;
987 {
988 	struct filedesc *fdp;
989 	struct proc *p;
990 	int nrele;
991 
992 	if (olddp->v_usecount == 1)
993 		return;
994 	sx_slock(&allproc_lock);
995 	LIST_FOREACH(p, &allproc, p_list) {
996 		PROC_LOCK(p);
997 		fdp = p->p_fd;
998 		if (fdp == NULL) {
999 			PROC_UNLOCK(p);
1000 			continue;
1001 		}
1002 		nrele = 0;
1003 		FILEDESC_LOCK(fdp);
1004 		if (fdp->fd_cdir == olddp) {
1005 			VREF(newdp);
1006 			fdp->fd_cdir = newdp;
1007 			nrele++;
1008 		}
1009 		if (fdp->fd_rdir == olddp) {
1010 			VREF(newdp);
1011 			fdp->fd_rdir = newdp;
1012 			nrele++;
1013 		}
1014 		FILEDESC_UNLOCK(fdp);
1015 		PROC_UNLOCK(p);
1016 		while (nrele--)
1017 			vrele(olddp);
1018 	}
1019 	sx_sunlock(&allproc_lock);
1020 	if (rootvnode == olddp) {
1021 		vrele(rootvnode);
1022 		VREF(newdp);
1023 		rootvnode = newdp;
1024 	}
1025 }
1026 
1027 /*
1028  * Unmount a file system.
1029  *
1030  * Note: unmount takes a path to the vnode mounted on as argument,
1031  * not special file (as before).
1032  */
1033 #ifndef _SYS_SYSPROTO_H_
1034 struct unmount_args {
1035 	char	*path;
1036 	int	flags;
1037 };
1038 #endif
1039 /* ARGSUSED */
1040 int
1041 unmount(td, uap)
1042 	struct thread *td;
1043 	register struct unmount_args /* {
1044 		syscallarg(char *) path;
1045 		syscallarg(int) flags;
1046 	} */ *uap;
1047 {
1048 	register struct vnode *vp;
1049 	struct mount *mp;
1050 	int error;
1051 	struct nameidata nd;
1052 
1053 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1054 	    SCARG(uap, path), td);
1055 	if ((error = namei(&nd)) != 0)
1056 		return (error);
1057 	vp = nd.ni_vp;
1058 	NDFREE(&nd, NDF_ONLY_PNBUF);
1059 	mp = vp->v_mount;
1060 
1061 	/*
1062 	 * Only root, or the user that did the original mount is
1063 	 * permitted to unmount this filesystem.
1064 	 */
1065 	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1066 		error = suser(td);
1067 		if (error) {
1068 			vput(vp);
1069 			return (error);
1070 		}
1071 	}
1072 
1073 	/*
1074 	 * Don't allow unmounting the root file system.
1075 	 */
1076 	if (mp->mnt_flag & MNT_ROOTFS) {
1077 		vput(vp);
1078 		return (EINVAL);
1079 	}
1080 
1081 	/*
1082 	 * Must be the root of the filesystem
1083 	 */
1084 	if ((vp->v_flag & VROOT) == 0) {
1085 		vput(vp);
1086 		return (EINVAL);
1087 	}
1088 	vput(vp);
1089 	return (dounmount(mp, SCARG(uap, flags), td));
1090 }
1091 
1092 /*
1093  * Do the actual file system unmount.
1094  */
1095 int
1096 dounmount(mp, flags, td)
1097 	struct mount *mp;
1098 	int flags;
1099 	struct thread *td;
1100 {
1101 	struct vnode *coveredvp, *fsrootvp;
1102 	int error;
1103 	int async_flag;
1104 
1105 	mtx_lock(&mountlist_mtx);
1106 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1107 		mtx_unlock(&mountlist_mtx);
1108 		return (EBUSY);
1109 	}
1110 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1111 	/* Allow filesystems to detect that a forced unmount is in progress. */
1112 	if (flags & MNT_FORCE)
1113 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1114 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1115 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1116 	if (error) {
1117 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1118 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1119 			wakeup((caddr_t)mp);
1120 		return (error);
1121 	}
1122 	vn_start_write(NULL, &mp, V_WAIT);
1123 
1124 	if (mp->mnt_flag & MNT_EXPUBLIC)
1125 		vfs_setpublicfs(NULL, NULL, NULL);
1126 
1127 	vfs_msync(mp, MNT_WAIT);
1128 	async_flag = mp->mnt_flag & MNT_ASYNC;
1129 	mp->mnt_flag &=~ MNT_ASYNC;
1130 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1131 	if (mp->mnt_syncer != NULL)
1132 		vrele(mp->mnt_syncer);
1133 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1134 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1135 		if (mp->mnt_vnodecovered != NULL)
1136 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1137 		if (fsrootvp == rootvnode) {
1138 			vrele(rootvnode);
1139 			rootvnode = NULL;
1140 		}
1141 		vput(fsrootvp);
1142 	}
1143 	if (((mp->mnt_flag & MNT_RDONLY) ||
1144 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1145 	    (flags & MNT_FORCE)) {
1146 		error = VFS_UNMOUNT(mp, flags, td);
1147 	}
1148 	vn_finished_write(mp);
1149 	if (error) {
1150 		/* Undo cdir/rdir and rootvnode changes made above. */
1151 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1152 			if (mp->mnt_vnodecovered != NULL)
1153 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1154 			if (rootvnode == NULL) {
1155 				rootvnode = fsrootvp;
1156 				vref(rootvnode);
1157 			}
1158 			vput(fsrootvp);
1159 		}
1160 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1161 			(void) vfs_allocate_syncvnode(mp);
1162 		mtx_lock(&mountlist_mtx);
1163 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1164 		mp->mnt_flag |= async_flag;
1165 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1166 		    &mountlist_mtx, td);
1167 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1168 			wakeup((caddr_t)mp);
1169 		return (error);
1170 	}
1171 	mtx_lock(&mountlist_mtx);
1172 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1173 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1174 		coveredvp->v_mountedhere = NULL;
1175 	mp->mnt_vfc->vfc_refcount--;
1176 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1177 		panic("unmount: dangling vnode");
1178 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1179 	lockdestroy(&mp->mnt_lock);
1180 	if (coveredvp != NULL)
1181 		vrele(coveredvp);
1182 	if (mp->mnt_kern_flag & MNTK_MWAIT)
1183 		wakeup((caddr_t)mp);
1184 	if (mp->mnt_op->vfs_mount == NULL)
1185 		vfs_freeopts(mp->mnt_opt);
1186 	free((caddr_t)mp, M_MOUNT);
1187 	return (0);
1188 }
1189 
1190 /*
1191  * Sync each mounted filesystem.
1192  */
1193 #ifndef _SYS_SYSPROTO_H_
1194 struct sync_args {
1195         int     dummy;
1196 };
1197 #endif
1198 
1199 #ifdef DEBUG
1200 static int syncprt = 0;
1201 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1202 #endif
1203 
1204 /* ARGSUSED */
1205 int
1206 sync(td, uap)
1207 	struct thread *td;
1208 	struct sync_args *uap;
1209 {
1210 	struct mount *mp, *nmp;
1211 	int asyncflag;
1212 
1213 	mtx_lock(&mountlist_mtx);
1214 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1215 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1216 			nmp = TAILQ_NEXT(mp, mnt_list);
1217 			continue;
1218 		}
1219 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
1220 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
1221 			asyncflag = mp->mnt_flag & MNT_ASYNC;
1222 			mp->mnt_flag &= ~MNT_ASYNC;
1223 			vfs_msync(mp, MNT_NOWAIT);
1224 			VFS_SYNC(mp, MNT_NOWAIT,
1225 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
1226 			mp->mnt_flag |= asyncflag;
1227 			vn_finished_write(mp);
1228 		}
1229 		mtx_lock(&mountlist_mtx);
1230 		nmp = TAILQ_NEXT(mp, mnt_list);
1231 		vfs_unbusy(mp, td);
1232 	}
1233 	mtx_unlock(&mountlist_mtx);
1234 #if 0
1235 /*
1236  * XXX don't call vfs_bufstats() yet because that routine
1237  * was not imported in the Lite2 merge.
1238  */
1239 #ifdef DIAGNOSTIC
1240 	if (syncprt)
1241 		vfs_bufstats();
1242 #endif /* DIAGNOSTIC */
1243 #endif
1244 	return (0);
1245 }
1246 
1247 /* XXX PRISON: could be per prison flag */
1248 static int prison_quotas;
1249 #if 0
1250 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1251 #endif
1252 
1253 /*
1254  * Change filesystem quotas.
1255  */
1256 #ifndef _SYS_SYSPROTO_H_
1257 struct quotactl_args {
1258 	char *path;
1259 	int cmd;
1260 	int uid;
1261 	caddr_t arg;
1262 };
1263 #endif
1264 /* ARGSUSED */
1265 int
1266 quotactl(td, uap)
1267 	struct thread *td;
1268 	register struct quotactl_args /* {
1269 		syscallarg(char *) path;
1270 		syscallarg(int) cmd;
1271 		syscallarg(int) uid;
1272 		syscallarg(caddr_t) arg;
1273 	} */ *uap;
1274 {
1275 	struct mount *mp;
1276 	int error;
1277 	struct nameidata nd;
1278 
1279 	if (jailed(td->td_ucred) && !prison_quotas)
1280 		return (EPERM);
1281 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1282 	if ((error = namei(&nd)) != 0)
1283 		return (error);
1284 	NDFREE(&nd, NDF_ONLY_PNBUF);
1285 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
1286 	vrele(nd.ni_vp);
1287 	if (error)
1288 		return (error);
1289 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
1290 	    SCARG(uap, arg), td);
1291 	vn_finished_write(mp);
1292 	return (error);
1293 }
1294 
1295 /*
1296  * Get filesystem statistics.
1297  */
1298 #ifndef _SYS_SYSPROTO_H_
1299 struct statfs_args {
1300 	char *path;
1301 	struct statfs *buf;
1302 };
1303 #endif
1304 /* ARGSUSED */
1305 int
1306 statfs(td, uap)
1307 	struct thread *td;
1308 	register struct statfs_args /* {
1309 		syscallarg(char *) path;
1310 		syscallarg(struct statfs *) buf;
1311 	} */ *uap;
1312 {
1313 	register struct mount *mp;
1314 	register struct statfs *sp;
1315 	int error;
1316 	struct nameidata nd;
1317 	struct statfs sb;
1318 
1319 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1320 	if ((error = namei(&nd)) != 0)
1321 		return (error);
1322 	mp = nd.ni_vp->v_mount;
1323 	sp = &mp->mnt_stat;
1324 	NDFREE(&nd, NDF_ONLY_PNBUF);
1325 	vrele(nd.ni_vp);
1326 	error = VFS_STATFS(mp, sp, td);
1327 	if (error)
1328 		return (error);
1329 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1330 	if (suser(td)) {
1331 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1332 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1333 		sp = &sb;
1334 	}
1335 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1336 }
1337 
1338 /*
1339  * Get filesystem statistics.
1340  */
1341 #ifndef _SYS_SYSPROTO_H_
1342 struct fstatfs_args {
1343 	int fd;
1344 	struct statfs *buf;
1345 };
1346 #endif
1347 /* ARGSUSED */
1348 int
1349 fstatfs(td, uap)
1350 	struct thread *td;
1351 	register struct fstatfs_args /* {
1352 		syscallarg(int) fd;
1353 		syscallarg(struct statfs *) buf;
1354 	} */ *uap;
1355 {
1356 	struct file *fp;
1357 	struct mount *mp;
1358 	register struct statfs *sp;
1359 	int error;
1360 	struct statfs sb;
1361 
1362 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1363 		return (error);
1364 	mp = ((struct vnode *)fp->f_data)->v_mount;
1365 	fdrop(fp, td);
1366 	if (mp == NULL)
1367 		return (EBADF);
1368 	sp = &mp->mnt_stat;
1369 	error = VFS_STATFS(mp, sp, td);
1370 	if (error)
1371 		return (error);
1372 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1373 	if (suser(td)) {
1374 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1375 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1376 		sp = &sb;
1377 	}
1378 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1379 }
1380 
1381 /*
1382  * Get statistics on all filesystems.
1383  */
1384 #ifndef _SYS_SYSPROTO_H_
1385 struct getfsstat_args {
1386 	struct statfs *buf;
1387 	long bufsize;
1388 	int flags;
1389 };
1390 #endif
1391 int
1392 getfsstat(td, uap)
1393 	struct thread *td;
1394 	register struct getfsstat_args /* {
1395 		syscallarg(struct statfs *) buf;
1396 		syscallarg(long) bufsize;
1397 		syscallarg(int) flags;
1398 	} */ *uap;
1399 {
1400 	register struct mount *mp, *nmp;
1401 	register struct statfs *sp;
1402 	caddr_t sfsp;
1403 	long count, maxcount, error;
1404 
1405 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
1406 	sfsp = (caddr_t)SCARG(uap, buf);
1407 	count = 0;
1408 	mtx_lock(&mountlist_mtx);
1409 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1410 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1411 			nmp = TAILQ_NEXT(mp, mnt_list);
1412 			continue;
1413 		}
1414 		if (sfsp && count < maxcount) {
1415 			sp = &mp->mnt_stat;
1416 			/*
1417 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1418 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1419 			 * overrides MNT_WAIT.
1420 			 */
1421 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1422 			    (SCARG(uap, flags) & MNT_WAIT)) &&
1423 			    (error = VFS_STATFS(mp, sp, td))) {
1424 				mtx_lock(&mountlist_mtx);
1425 				nmp = TAILQ_NEXT(mp, mnt_list);
1426 				vfs_unbusy(mp, td);
1427 				continue;
1428 			}
1429 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1430 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
1431 			if (error) {
1432 				vfs_unbusy(mp, td);
1433 				return (error);
1434 			}
1435 			sfsp += sizeof(*sp);
1436 		}
1437 		count++;
1438 		mtx_lock(&mountlist_mtx);
1439 		nmp = TAILQ_NEXT(mp, mnt_list);
1440 		vfs_unbusy(mp, td);
1441 	}
1442 	mtx_unlock(&mountlist_mtx);
1443 	if (sfsp && count > maxcount)
1444 		td->td_retval[0] = maxcount;
1445 	else
1446 		td->td_retval[0] = count;
1447 	return (0);
1448 }
1449 
1450 /*
1451  * Change current working directory to a given file descriptor.
1452  */
1453 #ifndef _SYS_SYSPROTO_H_
1454 struct fchdir_args {
1455 	int	fd;
1456 };
1457 #endif
1458 /* ARGSUSED */
1459 int
1460 fchdir(td, uap)
1461 	struct thread *td;
1462 	struct fchdir_args /* {
1463 		syscallarg(int) fd;
1464 	} */ *uap;
1465 {
1466 	register struct filedesc *fdp = td->td_proc->p_fd;
1467 	struct vnode *vp, *tdp, *vpold;
1468 	struct mount *mp;
1469 	struct file *fp;
1470 	int error;
1471 
1472 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1473 		return (error);
1474 	vp = (struct vnode *)fp->f_data;
1475 	VREF(vp);
1476 	fdrop(fp, td);
1477 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1478 	if (vp->v_type != VDIR)
1479 		error = ENOTDIR;
1480 	else
1481 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1482 	while (!error && (mp = vp->v_mountedhere) != NULL) {
1483 		if (vfs_busy(mp, 0, 0, td))
1484 			continue;
1485 		error = VFS_ROOT(mp, &tdp);
1486 		vfs_unbusy(mp, td);
1487 		if (error)
1488 			break;
1489 		vput(vp);
1490 		vp = tdp;
1491 	}
1492 	if (error) {
1493 		vput(vp);
1494 		return (error);
1495 	}
1496 	VOP_UNLOCK(vp, 0, td);
1497 	FILEDESC_LOCK(fdp);
1498 	vpold = fdp->fd_cdir;
1499 	fdp->fd_cdir = vp;
1500 	FILEDESC_UNLOCK(fdp);
1501 	vrele(vpold);
1502 	return (0);
1503 }
1504 
1505 /*
1506  * Change current working directory (``.'').
1507  */
1508 #ifndef _SYS_SYSPROTO_H_
1509 struct chdir_args {
1510 	char	*path;
1511 };
1512 #endif
1513 /* ARGSUSED */
1514 int
1515 chdir(td, uap)
1516 	struct thread *td;
1517 	struct chdir_args /* {
1518 		syscallarg(char *) path;
1519 	} */ *uap;
1520 {
1521 	register struct filedesc *fdp = td->td_proc->p_fd;
1522 	int error;
1523 	struct nameidata nd;
1524 	struct vnode *vp;
1525 
1526 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1527 	    SCARG(uap, path), td);
1528 	if ((error = change_dir(&nd, td)) != 0)
1529 		return (error);
1530 	NDFREE(&nd, NDF_ONLY_PNBUF);
1531 	FILEDESC_LOCK(fdp);
1532 	vp = fdp->fd_cdir;
1533 	fdp->fd_cdir = nd.ni_vp;
1534 	FILEDESC_UNLOCK(fdp);
1535 	vrele(vp);
1536 	return (0);
1537 }
1538 
1539 /*
1540  * Helper function for raised chroot(2) security function:  Refuse if
1541  * any filedescriptors are open directories.
1542  */
1543 static int
1544 chroot_refuse_vdir_fds(fdp)
1545 	struct filedesc *fdp;
1546 {
1547 	struct vnode *vp;
1548 	struct file *fp;
1549 	int fd;
1550 
1551 	FILEDESC_LOCK(fdp);
1552 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1553 		fp = fget_locked(fdp, fd);
1554 		if (fp == NULL)
1555 			continue;
1556 		if (fp->f_type == DTYPE_VNODE) {
1557 			vp = (struct vnode *)fp->f_data;
1558 			if (vp->v_type == VDIR) {
1559 				FILEDESC_UNLOCK(fdp);
1560 				return (EPERM);
1561 			}
1562 		}
1563 	}
1564 	FILEDESC_UNLOCK(fdp);
1565 	return (0);
1566 }
1567 
1568 /*
1569  * This sysctl determines if we will allow a process to chroot(2) if it
1570  * has a directory open:
1571  *	0: disallowed for all processes.
1572  *	1: allowed for processes that were not already chroot(2)'ed.
1573  *	2: allowed for all processes.
1574  */
1575 
1576 static int chroot_allow_open_directories = 1;
1577 
1578 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1579      &chroot_allow_open_directories, 0, "");
1580 
1581 /*
1582  * Change notion of root (``/'') directory.
1583  */
1584 #ifndef _SYS_SYSPROTO_H_
1585 struct chroot_args {
1586 	char	*path;
1587 };
1588 #endif
1589 /* ARGSUSED */
1590 int
1591 chroot(td, uap)
1592 	struct thread *td;
1593 	struct chroot_args /* {
1594 		syscallarg(char *) path;
1595 	} */ *uap;
1596 {
1597 	register struct filedesc *fdp = td->td_proc->p_fd;
1598 	int error;
1599 	struct nameidata nd;
1600 	struct vnode *vp;
1601 
1602 	error = suser_cred(td->td_ucred, PRISON_ROOT);
1603 	if (error)
1604 		return (error);
1605 	FILEDESC_LOCK(fdp);
1606 	if (chroot_allow_open_directories == 0 ||
1607 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1608 		FILEDESC_UNLOCK(fdp);
1609 		error = chroot_refuse_vdir_fds(fdp);
1610 	} else
1611 		FILEDESC_UNLOCK(fdp);
1612 	if (error)
1613 		return (error);
1614 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1615 	    SCARG(uap, path), td);
1616 	if ((error = change_dir(&nd, td)) != 0)
1617 		return (error);
1618 	NDFREE(&nd, NDF_ONLY_PNBUF);
1619 	FILEDESC_LOCK(fdp);
1620 	vp = fdp->fd_rdir;
1621 	fdp->fd_rdir = nd.ni_vp;
1622 	if (!fdp->fd_jdir) {
1623 		fdp->fd_jdir = nd.ni_vp;
1624                 VREF(fdp->fd_jdir);
1625 	}
1626 	FILEDESC_UNLOCK(fdp);
1627 	vrele(vp);
1628 	return (0);
1629 }
1630 
1631 /*
1632  * Common routine for chroot and chdir.
1633  */
1634 static int
1635 change_dir(ndp, td)
1636 	register struct nameidata *ndp;
1637 	struct thread *td;
1638 {
1639 	struct vnode *vp;
1640 	int error;
1641 
1642 	error = namei(ndp);
1643 	if (error)
1644 		return (error);
1645 	vp = ndp->ni_vp;
1646 	if (vp->v_type != VDIR)
1647 		error = ENOTDIR;
1648 	else
1649 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1650 	if (error)
1651 		vput(vp);
1652 	else
1653 		VOP_UNLOCK(vp, 0, td);
1654 	return (error);
1655 }
1656 
1657 /*
1658  * Check permissions, allocate an open file structure,
1659  * and call the device open routine if any.
1660  */
1661 #ifndef _SYS_SYSPROTO_H_
1662 struct open_args {
1663 	char	*path;
1664 	int	flags;
1665 	int	mode;
1666 };
1667 #endif
1668 int
1669 open(td, uap)
1670 	struct thread *td;
1671 	register struct open_args /* {
1672 		syscallarg(char *) path;
1673 		syscallarg(int) flags;
1674 		syscallarg(int) mode;
1675 	} */ *uap;
1676 {
1677 	struct proc *p = td->td_proc;
1678 	struct filedesc *fdp = p->p_fd;
1679 	struct file *fp;
1680 	struct vnode *vp;
1681 	struct vattr vat;
1682 	struct mount *mp;
1683 	int cmode, flags, oflags;
1684 	struct file *nfp;
1685 	int type, indx, error;
1686 	struct flock lf;
1687 	struct nameidata nd;
1688 
1689 	oflags = SCARG(uap, flags);
1690 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1691 		return (EINVAL);
1692 	flags = FFLAGS(oflags);
1693 	error = falloc(td, &nfp, &indx);
1694 	if (error)
1695 		return (error);
1696 	fp = nfp;
1697 	FILEDESC_LOCK(fdp);
1698 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1699 	FILEDESC_UNLOCK(fdp);
1700 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1701 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1702 	/*
1703 	 * Bump the ref count to prevent another process from closing
1704 	 * the descriptor while we are blocked in vn_open()
1705 	 */
1706 	fhold(fp);
1707 	error = vn_open(&nd, &flags, cmode);
1708 	if (error) {
1709 		/*
1710 		 * release our own reference
1711 		 */
1712 		fdrop(fp, td);
1713 
1714 		/*
1715 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1716 		 * responsible for dropping the old contents of ofiles[indx]
1717 		 * if it succeeds.
1718 		 */
1719 		if ((error == ENODEV || error == ENXIO) &&
1720 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1721 		    (error =
1722 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1723 			td->td_retval[0] = indx;
1724 			return (0);
1725 		}
1726 		/*
1727 		 * Clean up the descriptor, but only if another thread hadn't
1728 		 * replaced or closed it.
1729 		 */
1730 		FILEDESC_LOCK(fdp);
1731 		if (fdp->fd_ofiles[indx] == fp) {
1732 			fdp->fd_ofiles[indx] = NULL;
1733 			FILEDESC_UNLOCK(fdp);
1734 			fdrop(fp, td);
1735 		} else
1736 			FILEDESC_UNLOCK(fdp);
1737 
1738 		if (error == ERESTART)
1739 			error = EINTR;
1740 		return (error);
1741 	}
1742 	td->td_dupfd = 0;
1743 	NDFREE(&nd, NDF_ONLY_PNBUF);
1744 	vp = nd.ni_vp;
1745 
1746 	/*
1747 	 * There should be 2 references on the file, one from the descriptor
1748 	 * table, and one for us.
1749 	 *
1750 	 * Handle the case where someone closed the file (via its file
1751 	 * descriptor) while we were blocked.  The end result should look
1752 	 * like opening the file succeeded but it was immediately closed.
1753 	 */
1754 	FILEDESC_LOCK(fdp);
1755 	FILE_LOCK(fp);
1756 	if (fp->f_count == 1) {
1757 		KASSERT(fdp->fd_ofiles[indx] != fp,
1758 		    ("Open file descriptor lost all refs"));
1759 		FILEDESC_UNLOCK(fdp);
1760 		FILE_UNLOCK(fp);
1761 		VOP_UNLOCK(vp, 0, td);
1762 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1763 		fdrop(fp, td);
1764 		td->td_retval[0] = indx;
1765 		return 0;
1766 	}
1767 
1768 	/* assert that vn_open created a backing object if one is needed */
1769 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1770 		("open: vmio vnode has no backing object after vn_open"));
1771 
1772 	fp->f_data = (caddr_t)vp;
1773 	fp->f_flag = flags & FMASK;
1774 	fp->f_ops = &vnops;
1775 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1776 	FILEDESC_UNLOCK(fdp);
1777 	FILE_UNLOCK(fp);
1778 	VOP_UNLOCK(vp, 0, td);
1779 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1780 		lf.l_whence = SEEK_SET;
1781 		lf.l_start = 0;
1782 		lf.l_len = 0;
1783 		if (flags & O_EXLOCK)
1784 			lf.l_type = F_WRLCK;
1785 		else
1786 			lf.l_type = F_RDLCK;
1787 		type = F_FLOCK;
1788 		if ((flags & FNONBLOCK) == 0)
1789 			type |= F_WAIT;
1790 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1791 			goto bad;
1792 		fp->f_flag |= FHASLOCK;
1793 	}
1794 	if (flags & O_TRUNC) {
1795 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1796 			goto bad;
1797 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1798 		VATTR_NULL(&vat);
1799 		vat.va_size = 0;
1800 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1801 		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1802 		VOP_UNLOCK(vp, 0, td);
1803 		vn_finished_write(mp);
1804 		if (error)
1805 			goto bad;
1806 	}
1807 	/*
1808 	 * Release our private reference, leaving the one associated with
1809 	 * the descriptor table intact.
1810 	 */
1811 	fdrop(fp, td);
1812 	td->td_retval[0] = indx;
1813 	return (0);
1814 bad:
1815 	FILEDESC_LOCK(fdp);
1816 	if (fdp->fd_ofiles[indx] == fp) {
1817 		fdp->fd_ofiles[indx] = NULL;
1818 		FILEDESC_UNLOCK(fdp);
1819 		fdrop(fp, td);
1820 	} else
1821 		FILEDESC_UNLOCK(fdp);
1822 	return (error);
1823 }
1824 
1825 #ifdef COMPAT_43
1826 /*
1827  * Create a file.
1828  */
1829 #ifndef _SYS_SYSPROTO_H_
1830 struct ocreat_args {
1831 	char	*path;
1832 	int	mode;
1833 };
1834 #endif
1835 int
1836 ocreat(td, uap)
1837 	struct thread *td;
1838 	register struct ocreat_args /* {
1839 		syscallarg(char *) path;
1840 		syscallarg(int) mode;
1841 	} */ *uap;
1842 {
1843 	struct open_args /* {
1844 		syscallarg(char *) path;
1845 		syscallarg(int) flags;
1846 		syscallarg(int) mode;
1847 	} */ nuap;
1848 
1849 	SCARG(&nuap, path) = SCARG(uap, path);
1850 	SCARG(&nuap, mode) = SCARG(uap, mode);
1851 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1852 	return (open(td, &nuap));
1853 }
1854 #endif /* COMPAT_43 */
1855 
1856 /*
1857  * Create a special file.
1858  */
1859 #ifndef _SYS_SYSPROTO_H_
1860 struct mknod_args {
1861 	char	*path;
1862 	int	mode;
1863 	int	dev;
1864 };
1865 #endif
1866 /* ARGSUSED */
1867 int
1868 mknod(td, uap)
1869 	struct thread *td;
1870 	register struct mknod_args /* {
1871 		syscallarg(char *) path;
1872 		syscallarg(int) mode;
1873 		syscallarg(int) dev;
1874 	} */ *uap;
1875 {
1876 	struct vnode *vp;
1877 	struct mount *mp;
1878 	struct vattr vattr;
1879 	int error;
1880 	int whiteout = 0;
1881 	struct nameidata nd;
1882 
1883 	switch (SCARG(uap, mode) & S_IFMT) {
1884 	case S_IFCHR:
1885 	case S_IFBLK:
1886 		error = suser(td);
1887 		break;
1888 	default:
1889 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1890 		break;
1891 	}
1892 	if (error)
1893 		return (error);
1894 restart:
1895 	bwillwrite();
1896 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1897 	if ((error = namei(&nd)) != 0)
1898 		return (error);
1899 	vp = nd.ni_vp;
1900 	if (vp != NULL) {
1901 		vrele(vp);
1902 		error = EEXIST;
1903 	} else {
1904 		VATTR_NULL(&vattr);
1905 		FILEDESC_LOCK(td->td_proc->p_fd);
1906 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1907 		FILEDESC_UNLOCK(td->td_proc->p_fd);
1908 		vattr.va_rdev = SCARG(uap, dev);
1909 		whiteout = 0;
1910 
1911 		switch (SCARG(uap, mode) & S_IFMT) {
1912 		case S_IFMT:	/* used by badsect to flag bad sectors */
1913 			vattr.va_type = VBAD;
1914 			break;
1915 		case S_IFCHR:
1916 			vattr.va_type = VCHR;
1917 			break;
1918 		case S_IFBLK:
1919 			vattr.va_type = VBLK;
1920 			break;
1921 		case S_IFWHT:
1922 			whiteout = 1;
1923 			break;
1924 		default:
1925 			error = EINVAL;
1926 			break;
1927 		}
1928 	}
1929 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1930 		NDFREE(&nd, NDF_ONLY_PNBUF);
1931 		vput(nd.ni_dvp);
1932 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1933 			return (error);
1934 		goto restart;
1935 	}
1936 	if (!error) {
1937 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1938 		if (whiteout)
1939 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1940 		else {
1941 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1942 						&nd.ni_cnd, &vattr);
1943 			if (error == 0)
1944 				vput(nd.ni_vp);
1945 		}
1946 	}
1947 	NDFREE(&nd, NDF_ONLY_PNBUF);
1948 	vput(nd.ni_dvp);
1949 	vn_finished_write(mp);
1950 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1951 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1952 	return (error);
1953 }
1954 
1955 /*
1956  * Create a named pipe.
1957  */
1958 #ifndef _SYS_SYSPROTO_H_
1959 struct mkfifo_args {
1960 	char	*path;
1961 	int	mode;
1962 };
1963 #endif
1964 /* ARGSUSED */
1965 int
1966 mkfifo(td, uap)
1967 	struct thread *td;
1968 	register struct mkfifo_args /* {
1969 		syscallarg(char *) path;
1970 		syscallarg(int) mode;
1971 	} */ *uap;
1972 {
1973 	struct mount *mp;
1974 	struct vattr vattr;
1975 	int error;
1976 	struct nameidata nd;
1977 
1978 restart:
1979 	bwillwrite();
1980 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1981 	if ((error = namei(&nd)) != 0)
1982 		return (error);
1983 	if (nd.ni_vp != NULL) {
1984 		NDFREE(&nd, NDF_ONLY_PNBUF);
1985 		vrele(nd.ni_vp);
1986 		vput(nd.ni_dvp);
1987 		return (EEXIST);
1988 	}
1989 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1990 		NDFREE(&nd, NDF_ONLY_PNBUF);
1991 		vput(nd.ni_dvp);
1992 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1993 			return (error);
1994 		goto restart;
1995 	}
1996 	VATTR_NULL(&vattr);
1997 	vattr.va_type = VFIFO;
1998 	FILEDESC_LOCK(td->td_proc->p_fd);
1999 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
2000 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2001 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2002 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2003 	if (error == 0)
2004 		vput(nd.ni_vp);
2005 	NDFREE(&nd, NDF_ONLY_PNBUF);
2006 	vput(nd.ni_dvp);
2007 	vn_finished_write(mp);
2008 	return (error);
2009 }
2010 
2011 /*
2012  * Make a hard file link.
2013  */
2014 #ifndef _SYS_SYSPROTO_H_
2015 struct link_args {
2016 	char	*path;
2017 	char	*link;
2018 };
2019 #endif
2020 /* ARGSUSED */
2021 int
2022 link(td, uap)
2023 	struct thread *td;
2024 	register struct link_args /* {
2025 		syscallarg(char *) path;
2026 		syscallarg(char *) link;
2027 	} */ *uap;
2028 {
2029 	struct vnode *vp;
2030 	struct mount *mp;
2031 	struct nameidata nd;
2032 	int error;
2033 
2034 	bwillwrite();
2035 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
2036 	if ((error = namei(&nd)) != 0)
2037 		return (error);
2038 	NDFREE(&nd, NDF_ONLY_PNBUF);
2039 	vp = nd.ni_vp;
2040 	if (vp->v_type == VDIR) {
2041 		vrele(vp);
2042 		return (EPERM);		/* POSIX */
2043 	}
2044 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2045 		vrele(vp);
2046 		return (error);
2047 	}
2048 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2049 	if ((error = namei(&nd)) == 0) {
2050 		if (nd.ni_vp != NULL) {
2051 			vrele(nd.ni_vp);
2052 			error = EEXIST;
2053 		} else {
2054 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2055 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2056 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2057 		}
2058 		NDFREE(&nd, NDF_ONLY_PNBUF);
2059 		vput(nd.ni_dvp);
2060 	}
2061 	vrele(vp);
2062 	vn_finished_write(mp);
2063 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
2064 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
2065 	return (error);
2066 }
2067 
2068 /*
2069  * Make a symbolic link.
2070  */
2071 #ifndef _SYS_SYSPROTO_H_
2072 struct symlink_args {
2073 	char	*path;
2074 	char	*link;
2075 };
2076 #endif
2077 /* ARGSUSED */
2078 int
2079 symlink(td, uap)
2080 	struct thread *td;
2081 	register struct symlink_args /* {
2082 		syscallarg(char *) path;
2083 		syscallarg(char *) link;
2084 	} */ *uap;
2085 {
2086 	struct mount *mp;
2087 	struct vattr vattr;
2088 	char *path;
2089 	int error;
2090 	struct nameidata nd;
2091 
2092 	path = uma_zalloc(namei_zone, M_WAITOK);
2093 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
2094 		goto out;
2095 restart:
2096 	bwillwrite();
2097 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2098 	if ((error = namei(&nd)) != 0)
2099 		goto out;
2100 	if (nd.ni_vp) {
2101 		NDFREE(&nd, NDF_ONLY_PNBUF);
2102 		vrele(nd.ni_vp);
2103 		vput(nd.ni_dvp);
2104 		error = EEXIST;
2105 		goto out;
2106 	}
2107 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2108 		NDFREE(&nd, NDF_ONLY_PNBUF);
2109 		vput(nd.ni_dvp);
2110 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2111 			return (error);
2112 		goto restart;
2113 	}
2114 	VATTR_NULL(&vattr);
2115 	FILEDESC_LOCK(td->td_proc->p_fd);
2116 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
2117 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2118 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2119 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2120 	NDFREE(&nd, NDF_ONLY_PNBUF);
2121 	if (error == 0)
2122 		vput(nd.ni_vp);
2123 	vput(nd.ni_dvp);
2124 	vn_finished_write(mp);
2125 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
2126 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
2127 out:
2128 	uma_zfree(namei_zone, path);
2129 	return (error);
2130 }
2131 
2132 /*
2133  * Delete a whiteout from the filesystem.
2134  */
2135 /* ARGSUSED */
2136 int
2137 undelete(td, uap)
2138 	struct thread *td;
2139 	register struct undelete_args /* {
2140 		syscallarg(char *) path;
2141 	} */ *uap;
2142 {
2143 	int error;
2144 	struct mount *mp;
2145 	struct nameidata nd;
2146 
2147 restart:
2148 	bwillwrite();
2149 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2150 	    SCARG(uap, path), td);
2151 	error = namei(&nd);
2152 	if (error)
2153 		return (error);
2154 
2155 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2156 		NDFREE(&nd, NDF_ONLY_PNBUF);
2157 		if (nd.ni_vp)
2158 			vrele(nd.ni_vp);
2159 		vput(nd.ni_dvp);
2160 		return (EEXIST);
2161 	}
2162 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2163 		NDFREE(&nd, NDF_ONLY_PNBUF);
2164 		vput(nd.ni_dvp);
2165 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2166 			return (error);
2167 		goto restart;
2168 	}
2169 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2170 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
2171 	NDFREE(&nd, NDF_ONLY_PNBUF);
2172 	vput(nd.ni_dvp);
2173 	vn_finished_write(mp);
2174 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
2175 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
2176 	return (error);
2177 }
2178 
2179 /*
2180  * Delete a name from the filesystem.
2181  */
2182 #ifndef _SYS_SYSPROTO_H_
2183 struct unlink_args {
2184 	char	*path;
2185 };
2186 #endif
2187 /* ARGSUSED */
2188 int
2189 unlink(td, uap)
2190 	struct thread *td;
2191 	struct unlink_args /* {
2192 		syscallarg(char *) path;
2193 	} */ *uap;
2194 {
2195 	struct mount *mp;
2196 	struct vnode *vp;
2197 	int error;
2198 	struct nameidata nd;
2199 
2200 restart:
2201 	bwillwrite();
2202 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
2203 	if ((error = namei(&nd)) != 0)
2204 		return (error);
2205 	vp = nd.ni_vp;
2206 	if (vp->v_type == VDIR)
2207 		error = EPERM;		/* POSIX */
2208 	else {
2209 		/*
2210 		 * The root of a mounted filesystem cannot be deleted.
2211 		 *
2212 		 * XXX: can this only be a VDIR case?
2213 		 */
2214 		if (vp->v_flag & VROOT)
2215 			error = EBUSY;
2216 	}
2217 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2218 		NDFREE(&nd, NDF_ONLY_PNBUF);
2219 		vrele(vp);
2220 		vput(nd.ni_dvp);
2221 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2222 			return (error);
2223 		goto restart;
2224 	}
2225 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2226 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2227 	if (!error) {
2228 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2229 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
2230 	}
2231 	NDFREE(&nd, NDF_ONLY_PNBUF);
2232 	vput(nd.ni_dvp);
2233 	vput(vp);
2234 	vn_finished_write(mp);
2235 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
2236 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
2237 	return (error);
2238 }
2239 
2240 /*
2241  * Reposition read/write file offset.
2242  */
2243 #ifndef _SYS_SYSPROTO_H_
2244 struct lseek_args {
2245 	int	fd;
2246 	int	pad;
2247 	off_t	offset;
2248 	int	whence;
2249 };
2250 #endif
2251 int
2252 lseek(td, uap)
2253 	struct thread *td;
2254 	register struct lseek_args /* {
2255 		syscallarg(int) fd;
2256 		syscallarg(int) pad;
2257 		syscallarg(off_t) offset;
2258 		syscallarg(int) whence;
2259 	} */ *uap;
2260 {
2261 	struct ucred *cred = td->td_ucred;
2262 	struct file *fp;
2263 	struct vnode *vp;
2264 	struct vattr vattr;
2265 	off_t offset;
2266 	int error, noneg;
2267 
2268 	if ((error = fget(td, uap->fd, &fp)) != 0)
2269 		return (error);
2270 	if (fp->f_type != DTYPE_VNODE) {
2271 		fdrop(fp, td);
2272 		return (ESPIPE);
2273 	}
2274 	vp = (struct vnode *)fp->f_data;
2275 	noneg = (vp->v_type != VCHR);
2276 	offset = SCARG(uap, offset);
2277 	switch (SCARG(uap, whence)) {
2278 	case L_INCR:
2279 		if (noneg &&
2280 		    (fp->f_offset < 0 ||
2281 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
2282 			return (EOVERFLOW);
2283 		offset += fp->f_offset;
2284 		break;
2285 	case L_XTND:
2286 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2287 		error = VOP_GETATTR(vp, &vattr, cred, td);
2288 		VOP_UNLOCK(vp, 0, td);
2289 		if (error)
2290 			return (error);
2291 		if (noneg &&
2292 		    (vattr.va_size > OFF_MAX ||
2293 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
2294 			return (EOVERFLOW);
2295 		offset += vattr.va_size;
2296 		break;
2297 	case L_SET:
2298 		break;
2299 	default:
2300 		fdrop(fp, td);
2301 		return (EINVAL);
2302 	}
2303 	if (noneg && offset < 0)
2304 		return (EINVAL);
2305 	fp->f_offset = offset;
2306 	*(off_t *)(td->td_retval) = fp->f_offset;
2307 	fdrop(fp, td);
2308 	return (0);
2309 }
2310 
2311 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2312 /*
2313  * Reposition read/write file offset.
2314  */
2315 #ifndef _SYS_SYSPROTO_H_
2316 struct olseek_args {
2317 	int	fd;
2318 	long	offset;
2319 	int	whence;
2320 };
2321 #endif
2322 int
2323 olseek(td, uap)
2324 	struct thread *td;
2325 	register struct olseek_args /* {
2326 		syscallarg(int) fd;
2327 		syscallarg(long) offset;
2328 		syscallarg(int) whence;
2329 	} */ *uap;
2330 {
2331 	struct lseek_args /* {
2332 		syscallarg(int) fd;
2333 		syscallarg(int) pad;
2334 		syscallarg(off_t) offset;
2335 		syscallarg(int) whence;
2336 	} */ nuap;
2337 	int error;
2338 
2339 	SCARG(&nuap, fd) = SCARG(uap, fd);
2340 	SCARG(&nuap, offset) = SCARG(uap, offset);
2341 	SCARG(&nuap, whence) = SCARG(uap, whence);
2342 	error = lseek(td, &nuap);
2343 	return (error);
2344 }
2345 #endif /* COMPAT_43 */
2346 
2347 /*
2348  * Check access permissions using passed credentials.
2349  */
2350 static int
2351 vn_access(vp, user_flags, cred, td)
2352 	struct vnode	*vp;
2353 	int		user_flags;
2354 	struct ucred	*cred;
2355 	struct thread	*td;
2356 {
2357 	int error, flags;
2358 
2359 	/* Flags == 0 means only check for existence. */
2360 	error = 0;
2361 	if (user_flags) {
2362 		flags = 0;
2363 		if (user_flags & R_OK)
2364 			flags |= VREAD;
2365 		if (user_flags & W_OK)
2366 			flags |= VWRITE;
2367 		if (user_flags & X_OK)
2368 			flags |= VEXEC;
2369 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2370 			error = VOP_ACCESS(vp, flags, cred, td);
2371 	}
2372 	return (error);
2373 }
2374 
2375 /*
2376  * Check access permissions using "real" credentials.
2377  */
2378 #ifndef _SYS_SYSPROTO_H_
2379 struct access_args {
2380 	char	*path;
2381 	int	flags;
2382 };
2383 #endif
2384 int
2385 access(td, uap)
2386 	struct thread *td;
2387 	register struct access_args /* {
2388 		syscallarg(char *) path;
2389 		syscallarg(int) flags;
2390 	} */ *uap;
2391 {
2392 	struct ucred *cred, *tmpcred;
2393 	register struct vnode *vp;
2394 	int error;
2395 	struct nameidata nd;
2396 
2397 	/*
2398 	 * Create and modify a temporary credential instead of one that
2399 	 * is potentially shared.  This could also mess up socket
2400 	 * buffer accounting which can run in an interrupt context.
2401 	 *
2402 	 * XXX - Depending on how "threads" are finally implemented, it
2403 	 * may be better to explicitly pass the credential to namei()
2404 	 * rather than to modify the potentially shared process structure.
2405 	 */
2406 	cred = td->td_ucred;
2407 	tmpcred = crdup(cred);
2408 	tmpcred->cr_uid = cred->cr_ruid;
2409 	tmpcred->cr_groups[0] = cred->cr_rgid;
2410 	td->td_ucred = tmpcred;
2411 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2412 	    SCARG(uap, path), td);
2413 	if ((error = namei(&nd)) != 0)
2414 		goto out1;
2415 	vp = nd.ni_vp;
2416 
2417 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
2418 	NDFREE(&nd, NDF_ONLY_PNBUF);
2419 	vput(vp);
2420 out1:
2421 	td->td_ucred = cred;
2422 	crfree(tmpcred);
2423 	return (error);
2424 }
2425 
2426 /*
2427  * Check access permissions using "effective" credentials.
2428  */
2429 #ifndef _SYS_SYSPROTO_H_
2430 struct eaccess_args {
2431 	char	*path;
2432 	int	flags;
2433 };
2434 #endif
2435 int
2436 eaccess(td, uap)
2437 	struct thread *td;
2438 	register struct eaccess_args /* {
2439 		syscallarg(char *) path;
2440 		syscallarg(int) flags;
2441 	} */ *uap;
2442 {
2443 	struct nameidata nd;
2444 	struct vnode *vp;
2445 	int error;
2446 
2447 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2448 	    SCARG(uap, path), td);
2449 	if ((error = namei(&nd)) != 0)
2450 		return (error);
2451 	vp = nd.ni_vp;
2452 
2453 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
2454 	NDFREE(&nd, NDF_ONLY_PNBUF);
2455 	vput(vp);
2456 	return (error);
2457 }
2458 
2459 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2460 /*
2461  * Get file status; this version follows links.
2462  */
2463 #ifndef _SYS_SYSPROTO_H_
2464 struct ostat_args {
2465 	char	*path;
2466 	struct ostat *ub;
2467 };
2468 #endif
2469 /* ARGSUSED */
2470 int
2471 ostat(td, uap)
2472 	struct thread *td;
2473 	register struct ostat_args /* {
2474 		syscallarg(char *) path;
2475 		syscallarg(struct ostat *) ub;
2476 	} */ *uap;
2477 {
2478 	struct stat sb;
2479 	struct ostat osb;
2480 	int error;
2481 	struct nameidata nd;
2482 
2483 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2484 	    SCARG(uap, path), td);
2485 	if ((error = namei(&nd)) != 0)
2486 		return (error);
2487 	NDFREE(&nd, NDF_ONLY_PNBUF);
2488 	error = vn_stat(nd.ni_vp, &sb, td);
2489 	vput(nd.ni_vp);
2490 	if (error)
2491 		return (error);
2492 	cvtstat(&sb, &osb);
2493 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2494 	return (error);
2495 }
2496 
2497 /*
2498  * Get file status; this version does not follow links.
2499  */
2500 #ifndef _SYS_SYSPROTO_H_
2501 struct olstat_args {
2502 	char	*path;
2503 	struct ostat *ub;
2504 };
2505 #endif
2506 /* ARGSUSED */
2507 int
2508 olstat(td, uap)
2509 	struct thread *td;
2510 	register struct olstat_args /* {
2511 		syscallarg(char *) path;
2512 		syscallarg(struct ostat *) ub;
2513 	} */ *uap;
2514 {
2515 	struct vnode *vp;
2516 	struct stat sb;
2517 	struct ostat osb;
2518 	int error;
2519 	struct nameidata nd;
2520 
2521 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2522 	    SCARG(uap, path), td);
2523 	if ((error = namei(&nd)) != 0)
2524 		return (error);
2525 	vp = nd.ni_vp;
2526 	error = vn_stat(vp, &sb, td);
2527 	NDFREE(&nd, NDF_ONLY_PNBUF);
2528 	vput(vp);
2529 	if (error)
2530 		return (error);
2531 	cvtstat(&sb, &osb);
2532 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2533 	return (error);
2534 }
2535 
2536 /*
2537  * Convert from an old to a new stat structure.
2538  */
2539 void
2540 cvtstat(st, ost)
2541 	struct stat *st;
2542 	struct ostat *ost;
2543 {
2544 
2545 	ost->st_dev = st->st_dev;
2546 	ost->st_ino = st->st_ino;
2547 	ost->st_mode = st->st_mode;
2548 	ost->st_nlink = st->st_nlink;
2549 	ost->st_uid = st->st_uid;
2550 	ost->st_gid = st->st_gid;
2551 	ost->st_rdev = st->st_rdev;
2552 	if (st->st_size < (quad_t)1 << 32)
2553 		ost->st_size = st->st_size;
2554 	else
2555 		ost->st_size = -2;
2556 	ost->st_atime = st->st_atime;
2557 	ost->st_mtime = st->st_mtime;
2558 	ost->st_ctime = st->st_ctime;
2559 	ost->st_blksize = st->st_blksize;
2560 	ost->st_blocks = st->st_blocks;
2561 	ost->st_flags = st->st_flags;
2562 	ost->st_gen = st->st_gen;
2563 }
2564 #endif /* COMPAT_43 || COMPAT_SUNOS */
2565 
2566 /*
2567  * Get file status; this version follows links.
2568  */
2569 #ifndef _SYS_SYSPROTO_H_
2570 struct stat_args {
2571 	char	*path;
2572 	struct stat *ub;
2573 };
2574 #endif
2575 /* ARGSUSED */
2576 int
2577 stat(td, uap)
2578 	struct thread *td;
2579 	register struct stat_args /* {
2580 		syscallarg(char *) path;
2581 		syscallarg(struct stat *) ub;
2582 	} */ *uap;
2583 {
2584 	struct stat sb;
2585 	int error;
2586 	struct nameidata nd;
2587 
2588 #ifndef LOOKUP_EXCLUSIVE
2589 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2590 	    UIO_USERSPACE, SCARG(uap, path), td);
2591 #else
2592 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2593 	    SCARG(uap, path), td);
2594 #endif
2595 	if ((error = namei(&nd)) != 0)
2596 		return (error);
2597 	error = vn_stat(nd.ni_vp, &sb, td);
2598 	NDFREE(&nd, NDF_ONLY_PNBUF);
2599 	vput(nd.ni_vp);
2600 	if (error)
2601 		return (error);
2602 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2603 	return (error);
2604 }
2605 
2606 /*
2607  * Get file status; this version does not follow links.
2608  */
2609 #ifndef _SYS_SYSPROTO_H_
2610 struct lstat_args {
2611 	char	*path;
2612 	struct stat *ub;
2613 };
2614 #endif
2615 /* ARGSUSED */
2616 int
2617 lstat(td, uap)
2618 	struct thread *td;
2619 	register struct lstat_args /* {
2620 		syscallarg(char *) path;
2621 		syscallarg(struct stat *) ub;
2622 	} */ *uap;
2623 {
2624 	int error;
2625 	struct vnode *vp;
2626 	struct stat sb;
2627 	struct nameidata nd;
2628 
2629 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2630 	    SCARG(uap, path), td);
2631 	if ((error = namei(&nd)) != 0)
2632 		return (error);
2633 	vp = nd.ni_vp;
2634 	error = vn_stat(vp, &sb, td);
2635 	NDFREE(&nd, NDF_ONLY_PNBUF);
2636 	vput(vp);
2637 	if (error)
2638 		return (error);
2639 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2640 	return (error);
2641 }
2642 
2643 /*
2644  * Implementation of the NetBSD stat() function.
2645  * XXX This should probably be collapsed with the FreeBSD version,
2646  * as the differences are only due to vn_stat() clearing spares at
2647  * the end of the structures.  vn_stat could be split to avoid this,
2648  * and thus collapse the following to close to zero code.
2649  */
2650 void
2651 cvtnstat(sb, nsb)
2652 	struct stat *sb;
2653 	struct nstat *nsb;
2654 {
2655 	nsb->st_dev = sb->st_dev;
2656 	nsb->st_ino = sb->st_ino;
2657 	nsb->st_mode = sb->st_mode;
2658 	nsb->st_nlink = sb->st_nlink;
2659 	nsb->st_uid = sb->st_uid;
2660 	nsb->st_gid = sb->st_gid;
2661 	nsb->st_rdev = sb->st_rdev;
2662 	nsb->st_atimespec = sb->st_atimespec;
2663 	nsb->st_mtimespec = sb->st_mtimespec;
2664 	nsb->st_ctimespec = sb->st_ctimespec;
2665 	nsb->st_size = sb->st_size;
2666 	nsb->st_blocks = sb->st_blocks;
2667 	nsb->st_blksize = sb->st_blksize;
2668 	nsb->st_flags = sb->st_flags;
2669 	nsb->st_gen = sb->st_gen;
2670 	nsb->st_qspare[0] = sb->st_qspare[0];
2671 	nsb->st_qspare[1] = sb->st_qspare[1];
2672 }
2673 
2674 #ifndef _SYS_SYSPROTO_H_
2675 struct nstat_args {
2676 	char	*path;
2677 	struct nstat *ub;
2678 };
2679 #endif
2680 /* ARGSUSED */
2681 int
2682 nstat(td, uap)
2683 	struct thread *td;
2684 	register struct nstat_args /* {
2685 		syscallarg(char *) path;
2686 		syscallarg(struct nstat *) ub;
2687 	} */ *uap;
2688 {
2689 	struct stat sb;
2690 	struct nstat nsb;
2691 	int error;
2692 	struct nameidata nd;
2693 
2694 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2695 	    SCARG(uap, path), td);
2696 	if ((error = namei(&nd)) != 0)
2697 		return (error);
2698 	NDFREE(&nd, NDF_ONLY_PNBUF);
2699 	error = vn_stat(nd.ni_vp, &sb, td);
2700 	vput(nd.ni_vp);
2701 	if (error)
2702 		return (error);
2703 	cvtnstat(&sb, &nsb);
2704 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2705 	return (error);
2706 }
2707 
2708 /*
2709  * NetBSD lstat.  Get file status; this version does not follow links.
2710  */
2711 #ifndef _SYS_SYSPROTO_H_
2712 struct lstat_args {
2713 	char	*path;
2714 	struct stat *ub;
2715 };
2716 #endif
2717 /* ARGSUSED */
2718 int
2719 nlstat(td, uap)
2720 	struct thread *td;
2721 	register struct nlstat_args /* {
2722 		syscallarg(char *) path;
2723 		syscallarg(struct nstat *) ub;
2724 	} */ *uap;
2725 {
2726 	int error;
2727 	struct vnode *vp;
2728 	struct stat sb;
2729 	struct nstat nsb;
2730 	struct nameidata nd;
2731 
2732 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2733 	    SCARG(uap, path), td);
2734 	if ((error = namei(&nd)) != 0)
2735 		return (error);
2736 	vp = nd.ni_vp;
2737 	NDFREE(&nd, NDF_ONLY_PNBUF);
2738 	error = vn_stat(vp, &sb, td);
2739 	vput(vp);
2740 	if (error)
2741 		return (error);
2742 	cvtnstat(&sb, &nsb);
2743 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2744 	return (error);
2745 }
2746 
2747 /*
2748  * Get configurable pathname variables.
2749  */
2750 #ifndef _SYS_SYSPROTO_H_
2751 struct pathconf_args {
2752 	char	*path;
2753 	int	name;
2754 };
2755 #endif
2756 /* ARGSUSED */
2757 int
2758 pathconf(td, uap)
2759 	struct thread *td;
2760 	register struct pathconf_args /* {
2761 		syscallarg(char *) path;
2762 		syscallarg(int) name;
2763 	} */ *uap;
2764 {
2765 	int error;
2766 	struct nameidata nd;
2767 
2768 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2769 	    SCARG(uap, path), td);
2770 	if ((error = namei(&nd)) != 0)
2771 		return (error);
2772 	NDFREE(&nd, NDF_ONLY_PNBUF);
2773 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2774 	vput(nd.ni_vp);
2775 	return (error);
2776 }
2777 
2778 /*
2779  * Return target name of a symbolic link.
2780  */
2781 #ifndef _SYS_SYSPROTO_H_
2782 struct readlink_args {
2783 	char	*path;
2784 	char	*buf;
2785 	int	count;
2786 };
2787 #endif
2788 /* ARGSUSED */
2789 int
2790 readlink(td, uap)
2791 	struct thread *td;
2792 	register struct readlink_args /* {
2793 		syscallarg(char *) path;
2794 		syscallarg(char *) buf;
2795 		syscallarg(int) count;
2796 	} */ *uap;
2797 {
2798 	register struct vnode *vp;
2799 	struct iovec aiov;
2800 	struct uio auio;
2801 	int error;
2802 	struct nameidata nd;
2803 
2804 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2805 	    SCARG(uap, path), td);
2806 	if ((error = namei(&nd)) != 0)
2807 		return (error);
2808 	NDFREE(&nd, NDF_ONLY_PNBUF);
2809 	vp = nd.ni_vp;
2810 	if (vp->v_type != VLNK)
2811 		error = EINVAL;
2812 	else {
2813 		aiov.iov_base = SCARG(uap, buf);
2814 		aiov.iov_len = SCARG(uap, count);
2815 		auio.uio_iov = &aiov;
2816 		auio.uio_iovcnt = 1;
2817 		auio.uio_offset = 0;
2818 		auio.uio_rw = UIO_READ;
2819 		auio.uio_segflg = UIO_USERSPACE;
2820 		auio.uio_td = td;
2821 		auio.uio_resid = SCARG(uap, count);
2822 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2823 	}
2824 	vput(vp);
2825 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2826 	return (error);
2827 }
2828 
2829 /*
2830  * Common implementation code for chflags() and fchflags().
2831  */
2832 static int
2833 setfflags(td, vp, flags)
2834 	struct thread *td;
2835 	struct vnode *vp;
2836 	int flags;
2837 {
2838 	int error;
2839 	struct mount *mp;
2840 	struct vattr vattr;
2841 
2842 	/*
2843 	 * Prevent non-root users from setting flags on devices.  When
2844 	 * a device is reused, users can retain ownership of the device
2845 	 * if they are allowed to set flags and programs assume that
2846 	 * chown can't fail when done as root.
2847 	 */
2848 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2849 		error = suser_cred(td->td_ucred, PRISON_ROOT);
2850 		if (error)
2851 			return (error);
2852 	}
2853 
2854 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2855 		return (error);
2856 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2857 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2858 	VATTR_NULL(&vattr);
2859 	vattr.va_flags = flags;
2860 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2861 	VOP_UNLOCK(vp, 0, td);
2862 	vn_finished_write(mp);
2863 	return (error);
2864 }
2865 
2866 /*
2867  * Change flags of a file given a path name.
2868  */
2869 #ifndef _SYS_SYSPROTO_H_
2870 struct chflags_args {
2871 	char	*path;
2872 	int	flags;
2873 };
2874 #endif
2875 /* ARGSUSED */
2876 int
2877 chflags(td, uap)
2878 	struct thread *td;
2879 	register struct chflags_args /* {
2880 		syscallarg(char *) path;
2881 		syscallarg(int) flags;
2882 	} */ *uap;
2883 {
2884 	int error;
2885 	struct nameidata nd;
2886 
2887 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2888 	if ((error = namei(&nd)) != 0)
2889 		return (error);
2890 	NDFREE(&nd, NDF_ONLY_PNBUF);
2891 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2892 	vrele(nd.ni_vp);
2893 	return error;
2894 }
2895 
2896 /*
2897  * Same as chflags() but doesn't follow symlinks.
2898  */
2899 int
2900 lchflags(td, uap)
2901 	struct thread *td;
2902 	register struct lchflags_args /* {
2903 		syscallarg(char *) path;
2904 		syscallarg(int) flags;
2905 	} */ *uap;
2906 {
2907 	int error;
2908 	struct nameidata nd;
2909 
2910 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2911 	if ((error = namei(&nd)) != 0)
2912 		return (error);
2913 	NDFREE(&nd, NDF_ONLY_PNBUF);
2914 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2915 	vrele(nd.ni_vp);
2916 	return error;
2917 }
2918 
2919 /*
2920  * Change flags of a file given a file descriptor.
2921  */
2922 #ifndef _SYS_SYSPROTO_H_
2923 struct fchflags_args {
2924 	int	fd;
2925 	int	flags;
2926 };
2927 #endif
2928 /* ARGSUSED */
2929 int
2930 fchflags(td, uap)
2931 	struct thread *td;
2932 	register struct fchflags_args /* {
2933 		syscallarg(int) fd;
2934 		syscallarg(int) flags;
2935 	} */ *uap;
2936 {
2937 	struct file *fp;
2938 	int error;
2939 
2940 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2941 		return (error);
2942 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2943 	fdrop(fp, td);
2944 	return (error);
2945 }
2946 
2947 /*
2948  * Common implementation code for chmod(), lchmod() and fchmod().
2949  */
2950 static int
2951 setfmode(td, vp, mode)
2952 	struct thread *td;
2953 	struct vnode *vp;
2954 	int mode;
2955 {
2956 	int error;
2957 	struct mount *mp;
2958 	struct vattr vattr;
2959 
2960 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2961 		return (error);
2962 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2963 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2964 	VATTR_NULL(&vattr);
2965 	vattr.va_mode = mode & ALLPERMS;
2966 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2967 	VOP_UNLOCK(vp, 0, td);
2968 	vn_finished_write(mp);
2969 	return error;
2970 }
2971 
2972 /*
2973  * Change mode of a file given path name.
2974  */
2975 #ifndef _SYS_SYSPROTO_H_
2976 struct chmod_args {
2977 	char	*path;
2978 	int	mode;
2979 };
2980 #endif
2981 /* ARGSUSED */
2982 int
2983 chmod(td, uap)
2984 	struct thread *td;
2985 	register struct chmod_args /* {
2986 		syscallarg(char *) path;
2987 		syscallarg(int) mode;
2988 	} */ *uap;
2989 {
2990 	int error;
2991 	struct nameidata nd;
2992 
2993 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2994 	if ((error = namei(&nd)) != 0)
2995 		return (error);
2996 	NDFREE(&nd, NDF_ONLY_PNBUF);
2997 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2998 	vrele(nd.ni_vp);
2999 	return error;
3000 }
3001 
3002 /*
3003  * Change mode of a file given path name (don't follow links.)
3004  */
3005 #ifndef _SYS_SYSPROTO_H_
3006 struct lchmod_args {
3007 	char	*path;
3008 	int	mode;
3009 };
3010 #endif
3011 /* ARGSUSED */
3012 int
3013 lchmod(td, uap)
3014 	struct thread *td;
3015 	register struct lchmod_args /* {
3016 		syscallarg(char *) path;
3017 		syscallarg(int) mode;
3018 	} */ *uap;
3019 {
3020 	int error;
3021 	struct nameidata nd;
3022 
3023 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3024 	if ((error = namei(&nd)) != 0)
3025 		return (error);
3026 	NDFREE(&nd, NDF_ONLY_PNBUF);
3027 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
3028 	vrele(nd.ni_vp);
3029 	return error;
3030 }
3031 
3032 /*
3033  * Change mode of a file given a file descriptor.
3034  */
3035 #ifndef _SYS_SYSPROTO_H_
3036 struct fchmod_args {
3037 	int	fd;
3038 	int	mode;
3039 };
3040 #endif
3041 /* ARGSUSED */
3042 int
3043 fchmod(td, uap)
3044 	struct thread *td;
3045 	register struct fchmod_args /* {
3046 		syscallarg(int) fd;
3047 		syscallarg(int) mode;
3048 	} */ *uap;
3049 {
3050 	struct file *fp;
3051 	struct vnode *vp;
3052 	int error;
3053 
3054 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3055 		return (error);
3056 	vp = (struct vnode *)fp->f_data;
3057 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
3058 	fdrop(fp, td);
3059 	return (error);
3060 }
3061 
3062 /*
3063  * Common implementation for chown(), lchown(), and fchown()
3064  */
3065 static int
3066 setfown(td, vp, uid, gid)
3067 	struct thread *td;
3068 	struct vnode *vp;
3069 	uid_t uid;
3070 	gid_t gid;
3071 {
3072 	int error;
3073 	struct mount *mp;
3074 	struct vattr vattr;
3075 
3076 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3077 		return (error);
3078 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3079 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3080 	VATTR_NULL(&vattr);
3081 	vattr.va_uid = uid;
3082 	vattr.va_gid = gid;
3083 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3084 	VOP_UNLOCK(vp, 0, td);
3085 	vn_finished_write(mp);
3086 	return error;
3087 }
3088 
3089 /*
3090  * Set ownership given a path name.
3091  */
3092 #ifndef _SYS_SYSPROTO_H_
3093 struct chown_args {
3094 	char	*path;
3095 	int	uid;
3096 	int	gid;
3097 };
3098 #endif
3099 /* ARGSUSED */
3100 int
3101 chown(td, uap)
3102 	struct thread *td;
3103 	register struct chown_args /* {
3104 		syscallarg(char *) path;
3105 		syscallarg(int) uid;
3106 		syscallarg(int) gid;
3107 	} */ *uap;
3108 {
3109 	int error;
3110 	struct nameidata nd;
3111 
3112 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3113 	if ((error = namei(&nd)) != 0)
3114 		return (error);
3115 	NDFREE(&nd, NDF_ONLY_PNBUF);
3116 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3117 	vrele(nd.ni_vp);
3118 	return (error);
3119 }
3120 
3121 /*
3122  * Set ownership given a path name, do not cross symlinks.
3123  */
3124 #ifndef _SYS_SYSPROTO_H_
3125 struct lchown_args {
3126 	char	*path;
3127 	int	uid;
3128 	int	gid;
3129 };
3130 #endif
3131 /* ARGSUSED */
3132 int
3133 lchown(td, uap)
3134 	struct thread *td;
3135 	register struct lchown_args /* {
3136 		syscallarg(char *) path;
3137 		syscallarg(int) uid;
3138 		syscallarg(int) gid;
3139 	} */ *uap;
3140 {
3141 	int error;
3142 	struct nameidata nd;
3143 
3144 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3145 	if ((error = namei(&nd)) != 0)
3146 		return (error);
3147 	NDFREE(&nd, NDF_ONLY_PNBUF);
3148 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3149 	vrele(nd.ni_vp);
3150 	return (error);
3151 }
3152 
3153 /*
3154  * Set ownership given a file descriptor.
3155  */
3156 #ifndef _SYS_SYSPROTO_H_
3157 struct fchown_args {
3158 	int	fd;
3159 	int	uid;
3160 	int	gid;
3161 };
3162 #endif
3163 /* ARGSUSED */
3164 int
3165 fchown(td, uap)
3166 	struct thread *td;
3167 	register struct fchown_args /* {
3168 		syscallarg(int) fd;
3169 		syscallarg(int) uid;
3170 		syscallarg(int) gid;
3171 	} */ *uap;
3172 {
3173 	struct file *fp;
3174 	struct vnode *vp;
3175 	int error;
3176 
3177 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3178 		return (error);
3179 	vp = (struct vnode *)fp->f_data;
3180 	error = setfown(td, (struct vnode *)fp->f_data,
3181 		SCARG(uap, uid), SCARG(uap, gid));
3182 	fdrop(fp, td);
3183 	return (error);
3184 }
3185 
3186 /*
3187  * Common implementation code for utimes(), lutimes(), and futimes().
3188  */
3189 static int
3190 getutimes(usrtvp, tsp)
3191 	const struct timeval *usrtvp;
3192 	struct timespec *tsp;
3193 {
3194 	struct timeval tv[2];
3195 	int error;
3196 
3197 	if (usrtvp == NULL) {
3198 		microtime(&tv[0]);
3199 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3200 		tsp[1] = tsp[0];
3201 	} else {
3202 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
3203 			return (error);
3204 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3205 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
3206 	}
3207 	return 0;
3208 }
3209 
3210 /*
3211  * Common implementation code for utimes(), lutimes(), and futimes().
3212  */
3213 static int
3214 setutimes(td, vp, ts, nullflag)
3215 	struct thread *td;
3216 	struct vnode *vp;
3217 	const struct timespec *ts;
3218 	int nullflag;
3219 {
3220 	int error;
3221 	struct mount *mp;
3222 	struct vattr vattr;
3223 
3224 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3225 		return (error);
3226 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3227 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3228 	VATTR_NULL(&vattr);
3229 	vattr.va_atime = ts[0];
3230 	vattr.va_mtime = ts[1];
3231 	if (nullflag)
3232 		vattr.va_vaflags |= VA_UTIMES_NULL;
3233 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3234 	VOP_UNLOCK(vp, 0, td);
3235 	vn_finished_write(mp);
3236 	return error;
3237 }
3238 
3239 /*
3240  * Set the access and modification times of a file.
3241  */
3242 #ifndef _SYS_SYSPROTO_H_
3243 struct utimes_args {
3244 	char	*path;
3245 	struct	timeval *tptr;
3246 };
3247 #endif
3248 /* ARGSUSED */
3249 int
3250 utimes(td, uap)
3251 	struct thread *td;
3252 	register struct utimes_args /* {
3253 		syscallarg(char *) path;
3254 		syscallarg(struct timeval *) tptr;
3255 	} */ *uap;
3256 {
3257 	struct timespec ts[2];
3258 	struct timeval *usrtvp;
3259 	int error;
3260 	struct nameidata nd;
3261 
3262 	usrtvp = SCARG(uap, tptr);
3263 	if ((error = getutimes(usrtvp, ts)) != 0)
3264 		return (error);
3265 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3266 	if ((error = namei(&nd)) != 0)
3267 		return (error);
3268 	NDFREE(&nd, NDF_ONLY_PNBUF);
3269 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3270 	vrele(nd.ni_vp);
3271 	return (error);
3272 }
3273 
3274 /*
3275  * Set the access and modification times of a file.
3276  */
3277 #ifndef _SYS_SYSPROTO_H_
3278 struct lutimes_args {
3279 	char	*path;
3280 	struct	timeval *tptr;
3281 };
3282 #endif
3283 /* ARGSUSED */
3284 int
3285 lutimes(td, uap)
3286 	struct thread *td;
3287 	register struct lutimes_args /* {
3288 		syscallarg(char *) path;
3289 		syscallarg(struct timeval *) tptr;
3290 	} */ *uap;
3291 {
3292 	struct timespec ts[2];
3293 	struct timeval *usrtvp;
3294 	int error;
3295 	struct nameidata nd;
3296 
3297 	usrtvp = SCARG(uap, tptr);
3298 	if ((error = getutimes(usrtvp, ts)) != 0)
3299 		return (error);
3300 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3301 	if ((error = namei(&nd)) != 0)
3302 		return (error);
3303 	NDFREE(&nd, NDF_ONLY_PNBUF);
3304 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3305 	vrele(nd.ni_vp);
3306 	return (error);
3307 }
3308 
3309 /*
3310  * Set the access and modification times of a file.
3311  */
3312 #ifndef _SYS_SYSPROTO_H_
3313 struct futimes_args {
3314 	int	fd;
3315 	struct	timeval *tptr;
3316 };
3317 #endif
3318 /* ARGSUSED */
3319 int
3320 futimes(td, uap)
3321 	struct thread *td;
3322 	register struct futimes_args /* {
3323 		syscallarg(int ) fd;
3324 		syscallarg(struct timeval *) tptr;
3325 	} */ *uap;
3326 {
3327 	struct timespec ts[2];
3328 	struct file *fp;
3329 	struct timeval *usrtvp;
3330 	int error;
3331 
3332 	usrtvp = SCARG(uap, tptr);
3333 	if ((error = getutimes(usrtvp, ts)) != 0)
3334 		return (error);
3335 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3336 		return (error);
3337 	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
3338 	fdrop(fp, td);
3339 	return (error);
3340 }
3341 
3342 /*
3343  * Truncate a file given its path name.
3344  */
3345 #ifndef _SYS_SYSPROTO_H_
3346 struct truncate_args {
3347 	char	*path;
3348 	int	pad;
3349 	off_t	length;
3350 };
3351 #endif
3352 /* ARGSUSED */
3353 int
3354 truncate(td, uap)
3355 	struct thread *td;
3356 	register struct truncate_args /* {
3357 		syscallarg(char *) path;
3358 		syscallarg(int) pad;
3359 		syscallarg(off_t) length;
3360 	} */ *uap;
3361 {
3362 	struct mount *mp;
3363 	struct vnode *vp;
3364 	struct vattr vattr;
3365 	int error;
3366 	struct nameidata nd;
3367 
3368 	if (uap->length < 0)
3369 		return(EINVAL);
3370 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3371 	if ((error = namei(&nd)) != 0)
3372 		return (error);
3373 	vp = nd.ni_vp;
3374 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3375 		vrele(vp);
3376 		return (error);
3377 	}
3378 	NDFREE(&nd, NDF_ONLY_PNBUF);
3379 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3380 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3381 	if (vp->v_type == VDIR)
3382 		error = EISDIR;
3383 	else if ((error = vn_writechk(vp)) == 0 &&
3384 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3385 		VATTR_NULL(&vattr);
3386 		vattr.va_size = SCARG(uap, length);
3387 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3388 	}
3389 	vput(vp);
3390 	vn_finished_write(mp);
3391 	return (error);
3392 }
3393 
3394 /*
3395  * Truncate a file given a file descriptor.
3396  */
3397 #ifndef _SYS_SYSPROTO_H_
3398 struct ftruncate_args {
3399 	int	fd;
3400 	int	pad;
3401 	off_t	length;
3402 };
3403 #endif
3404 /* ARGSUSED */
3405 int
3406 ftruncate(td, uap)
3407 	struct thread *td;
3408 	register struct ftruncate_args /* {
3409 		syscallarg(int) fd;
3410 		syscallarg(int) pad;
3411 		syscallarg(off_t) length;
3412 	} */ *uap;
3413 {
3414 	struct mount *mp;
3415 	struct vattr vattr;
3416 	struct vnode *vp;
3417 	struct file *fp;
3418 	int error;
3419 
3420 	if (uap->length < 0)
3421 		return(EINVAL);
3422 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3423 		return (error);
3424 	if ((fp->f_flag & FWRITE) == 0) {
3425 		fdrop(fp, td);
3426 		return (EINVAL);
3427 	}
3428 	vp = (struct vnode *)fp->f_data;
3429 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3430 		fdrop(fp, td);
3431 		return (error);
3432 	}
3433 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3434 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3435 	if (vp->v_type == VDIR)
3436 		error = EISDIR;
3437 	else if ((error = vn_writechk(vp)) == 0) {
3438 		VATTR_NULL(&vattr);
3439 		vattr.va_size = SCARG(uap, length);
3440 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3441 	}
3442 	VOP_UNLOCK(vp, 0, td);
3443 	vn_finished_write(mp);
3444 	fdrop(fp, td);
3445 	return (error);
3446 }
3447 
3448 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3449 /*
3450  * Truncate a file given its path name.
3451  */
3452 #ifndef _SYS_SYSPROTO_H_
3453 struct otruncate_args {
3454 	char	*path;
3455 	long	length;
3456 };
3457 #endif
3458 /* ARGSUSED */
3459 int
3460 otruncate(td, uap)
3461 	struct thread *td;
3462 	register struct otruncate_args /* {
3463 		syscallarg(char *) path;
3464 		syscallarg(long) length;
3465 	} */ *uap;
3466 {
3467 	struct truncate_args /* {
3468 		syscallarg(char *) path;
3469 		syscallarg(int) pad;
3470 		syscallarg(off_t) length;
3471 	} */ nuap;
3472 
3473 	SCARG(&nuap, path) = SCARG(uap, path);
3474 	SCARG(&nuap, length) = SCARG(uap, length);
3475 	return (truncate(td, &nuap));
3476 }
3477 
3478 /*
3479  * Truncate a file given a file descriptor.
3480  */
3481 #ifndef _SYS_SYSPROTO_H_
3482 struct oftruncate_args {
3483 	int	fd;
3484 	long	length;
3485 };
3486 #endif
3487 /* ARGSUSED */
3488 int
3489 oftruncate(td, uap)
3490 	struct thread *td;
3491 	register struct oftruncate_args /* {
3492 		syscallarg(int) fd;
3493 		syscallarg(long) length;
3494 	} */ *uap;
3495 {
3496 	struct ftruncate_args /* {
3497 		syscallarg(int) fd;
3498 		syscallarg(int) pad;
3499 		syscallarg(off_t) length;
3500 	} */ nuap;
3501 
3502 	SCARG(&nuap, fd) = SCARG(uap, fd);
3503 	SCARG(&nuap, length) = SCARG(uap, length);
3504 	return (ftruncate(td, &nuap));
3505 }
3506 #endif /* COMPAT_43 || COMPAT_SUNOS */
3507 
3508 /*
3509  * Sync an open file.
3510  */
3511 #ifndef _SYS_SYSPROTO_H_
3512 struct fsync_args {
3513 	int	fd;
3514 };
3515 #endif
3516 /* ARGSUSED */
3517 int
3518 fsync(td, uap)
3519 	struct thread *td;
3520 	struct fsync_args /* {
3521 		syscallarg(int) fd;
3522 	} */ *uap;
3523 {
3524 	struct vnode *vp;
3525 	struct mount *mp;
3526 	struct file *fp;
3527 	vm_object_t obj;
3528 	int error;
3529 
3530 	GIANT_REQUIRED;
3531 
3532 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3533 		return (error);
3534 	vp = (struct vnode *)fp->f_data;
3535 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3536 		fdrop(fp, td);
3537 		return (error);
3538 	}
3539 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3540 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3541 		vm_object_page_clean(obj, 0, 0, 0);
3542 	}
3543 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3544 #ifdef SOFTUPDATES
3545 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3546 	    error = softdep_fsync(vp);
3547 #endif
3548 
3549 	VOP_UNLOCK(vp, 0, td);
3550 	vn_finished_write(mp);
3551 	fdrop(fp, td);
3552 	return (error);
3553 }
3554 
3555 /*
3556  * Rename files.  Source and destination must either both be directories,
3557  * or both not be directories.  If target is a directory, it must be empty.
3558  */
3559 #ifndef _SYS_SYSPROTO_H_
3560 struct rename_args {
3561 	char	*from;
3562 	char	*to;
3563 };
3564 #endif
3565 /* ARGSUSED */
3566 int
3567 rename(td, uap)
3568 	struct thread *td;
3569 	register struct rename_args /* {
3570 		syscallarg(char *) from;
3571 		syscallarg(char *) to;
3572 	} */ *uap;
3573 {
3574 	struct mount *mp;
3575 	struct vnode *tvp, *fvp, *tdvp;
3576 	struct nameidata fromnd, tond;
3577 	int error;
3578 
3579 	bwillwrite();
3580 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3581 	    SCARG(uap, from), td);
3582 	if ((error = namei(&fromnd)) != 0)
3583 		return (error);
3584 	fvp = fromnd.ni_vp;
3585 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3586 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3587 		vrele(fromnd.ni_dvp);
3588 		vrele(fvp);
3589 		goto out1;
3590 	}
3591 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3592 	    UIO_USERSPACE, SCARG(uap, to), td);
3593 	if (fromnd.ni_vp->v_type == VDIR)
3594 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3595 	if ((error = namei(&tond)) != 0) {
3596 		/* Translate error code for rename("dir1", "dir2/."). */
3597 		if (error == EISDIR && fvp->v_type == VDIR)
3598 			error = EINVAL;
3599 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3600 		vrele(fromnd.ni_dvp);
3601 		vrele(fvp);
3602 		goto out1;
3603 	}
3604 	tdvp = tond.ni_dvp;
3605 	tvp = tond.ni_vp;
3606 	if (tvp != NULL) {
3607 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3608 			error = ENOTDIR;
3609 			goto out;
3610 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3611 			error = EISDIR;
3612 			goto out;
3613 		}
3614 	}
3615 	if (fvp == tdvp)
3616 		error = EINVAL;
3617 	/*
3618 	 * If source is the same as the destination (that is the
3619 	 * same inode number with the same name in the same directory),
3620 	 * then there is nothing to do.
3621 	 */
3622 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3623 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3624 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3625 	      fromnd.ni_cnd.cn_namelen))
3626 		error = -1;
3627 out:
3628 	if (!error) {
3629 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3630 		if (fromnd.ni_dvp != tdvp) {
3631 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3632 		}
3633 		if (tvp) {
3634 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3635 		}
3636 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3637 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3638 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3639 		NDFREE(&tond, NDF_ONLY_PNBUF);
3640 	} else {
3641 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3642 		NDFREE(&tond, NDF_ONLY_PNBUF);
3643 		if (tdvp == tvp)
3644 			vrele(tdvp);
3645 		else
3646 			vput(tdvp);
3647 		if (tvp)
3648 			vput(tvp);
3649 		vrele(fromnd.ni_dvp);
3650 		vrele(fvp);
3651 	}
3652 	vrele(tond.ni_startdir);
3653 	vn_finished_write(mp);
3654 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3655 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3656 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3657 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3658 out1:
3659 	if (fromnd.ni_startdir)
3660 		vrele(fromnd.ni_startdir);
3661 	if (error == -1)
3662 		return (0);
3663 	return (error);
3664 }
3665 
3666 /*
3667  * Make a directory file.
3668  */
3669 #ifndef _SYS_SYSPROTO_H_
3670 struct mkdir_args {
3671 	char	*path;
3672 	int	mode;
3673 };
3674 #endif
3675 /* ARGSUSED */
3676 int
3677 mkdir(td, uap)
3678 	struct thread *td;
3679 	register struct mkdir_args /* {
3680 		syscallarg(char *) path;
3681 		syscallarg(int) mode;
3682 	} */ *uap;
3683 {
3684 
3685 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3686 }
3687 
3688 int
3689 vn_mkdir(path, mode, segflg, td)
3690 	char *path;
3691 	int mode;
3692 	enum uio_seg segflg;
3693 	struct thread *td;
3694 {
3695 	struct mount *mp;
3696 	struct vnode *vp;
3697 	struct vattr vattr;
3698 	int error;
3699 	struct nameidata nd;
3700 
3701 restart:
3702 	bwillwrite();
3703 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3704 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3705 	if ((error = namei(&nd)) != 0)
3706 		return (error);
3707 	vp = nd.ni_vp;
3708 	if (vp != NULL) {
3709 		NDFREE(&nd, NDF_ONLY_PNBUF);
3710 		vrele(vp);
3711 		vput(nd.ni_dvp);
3712 		return (EEXIST);
3713 	}
3714 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3715 		NDFREE(&nd, NDF_ONLY_PNBUF);
3716 		vput(nd.ni_dvp);
3717 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3718 			return (error);
3719 		goto restart;
3720 	}
3721 	VATTR_NULL(&vattr);
3722 	vattr.va_type = VDIR;
3723 	FILEDESC_LOCK(td->td_proc->p_fd);
3724 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3725 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3726 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3727 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3728 	NDFREE(&nd, NDF_ONLY_PNBUF);
3729 	vput(nd.ni_dvp);
3730 	if (!error)
3731 		vput(nd.ni_vp);
3732 	vn_finished_write(mp);
3733 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3734 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3735 	return (error);
3736 }
3737 
3738 /*
3739  * Remove a directory file.
3740  */
3741 #ifndef _SYS_SYSPROTO_H_
3742 struct rmdir_args {
3743 	char	*path;
3744 };
3745 #endif
3746 /* ARGSUSED */
3747 int
3748 rmdir(td, uap)
3749 	struct thread *td;
3750 	struct rmdir_args /* {
3751 		syscallarg(char *) path;
3752 	} */ *uap;
3753 {
3754 	struct mount *mp;
3755 	struct vnode *vp;
3756 	int error;
3757 	struct nameidata nd;
3758 
3759 restart:
3760 	bwillwrite();
3761 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3762 	    SCARG(uap, path), td);
3763 	if ((error = namei(&nd)) != 0)
3764 		return (error);
3765 	vp = nd.ni_vp;
3766 	if (vp->v_type != VDIR) {
3767 		error = ENOTDIR;
3768 		goto out;
3769 	}
3770 	/*
3771 	 * No rmdir "." please.
3772 	 */
3773 	if (nd.ni_dvp == vp) {
3774 		error = EINVAL;
3775 		goto out;
3776 	}
3777 	/*
3778 	 * The root of a mounted filesystem cannot be deleted.
3779 	 */
3780 	if (vp->v_flag & VROOT) {
3781 		error = EBUSY;
3782 		goto out;
3783 	}
3784 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3785 		NDFREE(&nd, NDF_ONLY_PNBUF);
3786 		if (nd.ni_dvp == vp)
3787 			vrele(nd.ni_dvp);
3788 		else
3789 			vput(nd.ni_dvp);
3790 		vput(vp);
3791 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3792 			return (error);
3793 		goto restart;
3794 	}
3795 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3796 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3797 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3798 	vn_finished_write(mp);
3799 out:
3800 	NDFREE(&nd, NDF_ONLY_PNBUF);
3801 	if (nd.ni_dvp == vp)
3802 		vrele(nd.ni_dvp);
3803 	else
3804 		vput(nd.ni_dvp);
3805 	vput(vp);
3806 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3807 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3808 	return (error);
3809 }
3810 
3811 #ifdef COMPAT_43
3812 /*
3813  * Read a block of directory entries in a file system independent format.
3814  */
3815 #ifndef _SYS_SYSPROTO_H_
3816 struct ogetdirentries_args {
3817 	int	fd;
3818 	char	*buf;
3819 	u_int	count;
3820 	long	*basep;
3821 };
3822 #endif
3823 int
3824 ogetdirentries(td, uap)
3825 	struct thread *td;
3826 	register struct ogetdirentries_args /* {
3827 		syscallarg(int) fd;
3828 		syscallarg(char *) buf;
3829 		syscallarg(u_int) count;
3830 		syscallarg(long *) basep;
3831 	} */ *uap;
3832 {
3833 	struct vnode *vp;
3834 	struct file *fp;
3835 	struct uio auio, kuio;
3836 	struct iovec aiov, kiov;
3837 	struct dirent *dp, *edp;
3838 	caddr_t dirbuf;
3839 	int error, eofflag, readcnt;
3840 	long loff;
3841 
3842 	/* XXX arbitrary sanity limit on `count'. */
3843 	if (SCARG(uap, count) > 64 * 1024)
3844 		return (EINVAL);
3845 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3846 		return (error);
3847 	if ((fp->f_flag & FREAD) == 0) {
3848 		fdrop(fp, td);
3849 		return (EBADF);
3850 	}
3851 	vp = (struct vnode *)fp->f_data;
3852 unionread:
3853 	if (vp->v_type != VDIR) {
3854 		fdrop(fp, td);
3855 		return (EINVAL);
3856 	}
3857 	aiov.iov_base = SCARG(uap, buf);
3858 	aiov.iov_len = SCARG(uap, count);
3859 	auio.uio_iov = &aiov;
3860 	auio.uio_iovcnt = 1;
3861 	auio.uio_rw = UIO_READ;
3862 	auio.uio_segflg = UIO_USERSPACE;
3863 	auio.uio_td = td;
3864 	auio.uio_resid = SCARG(uap, count);
3865 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3866 	loff = auio.uio_offset = fp->f_offset;
3867 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3868 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3869 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3870 			    NULL, NULL);
3871 			fp->f_offset = auio.uio_offset;
3872 		} else
3873 #	endif
3874 	{
3875 		kuio = auio;
3876 		kuio.uio_iov = &kiov;
3877 		kuio.uio_segflg = UIO_SYSSPACE;
3878 		kiov.iov_len = SCARG(uap, count);
3879 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3880 		kiov.iov_base = dirbuf;
3881 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3882 			    NULL, NULL);
3883 		fp->f_offset = kuio.uio_offset;
3884 		if (error == 0) {
3885 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3886 			edp = (struct dirent *)&dirbuf[readcnt];
3887 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3888 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3889 					/*
3890 					 * The expected low byte of
3891 					 * dp->d_namlen is our dp->d_type.
3892 					 * The high MBZ byte of dp->d_namlen
3893 					 * is our dp->d_namlen.
3894 					 */
3895 					dp->d_type = dp->d_namlen;
3896 					dp->d_namlen = 0;
3897 #				else
3898 					/*
3899 					 * The dp->d_type is the high byte
3900 					 * of the expected dp->d_namlen,
3901 					 * so must be zero'ed.
3902 					 */
3903 					dp->d_type = 0;
3904 #				endif
3905 				if (dp->d_reclen > 0) {
3906 					dp = (struct dirent *)
3907 					    ((char *)dp + dp->d_reclen);
3908 				} else {
3909 					error = EIO;
3910 					break;
3911 				}
3912 			}
3913 			if (dp >= edp)
3914 				error = uiomove(dirbuf, readcnt, &auio);
3915 		}
3916 		FREE(dirbuf, M_TEMP);
3917 	}
3918 	VOP_UNLOCK(vp, 0, td);
3919 	if (error) {
3920 		fdrop(fp, td);
3921 		return (error);
3922 	}
3923 	if (SCARG(uap, count) == auio.uio_resid) {
3924 		if (union_dircheckp) {
3925 			error = union_dircheckp(td, &vp, fp);
3926 			if (error == -1)
3927 				goto unionread;
3928 			if (error) {
3929 				fdrop(fp, td);
3930 				return (error);
3931 			}
3932 		}
3933 		if ((vp->v_flag & VROOT) &&
3934 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3935 			struct vnode *tvp = vp;
3936 			vp = vp->v_mount->mnt_vnodecovered;
3937 			VREF(vp);
3938 			fp->f_data = (caddr_t) vp;
3939 			fp->f_offset = 0;
3940 			vrele(tvp);
3941 			goto unionread;
3942 		}
3943 	}
3944 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3945 	    sizeof(long));
3946 	fdrop(fp, td);
3947 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3948 	return (error);
3949 }
3950 #endif /* COMPAT_43 */
3951 
3952 /*
3953  * Read a block of directory entries in a file system independent format.
3954  */
3955 #ifndef _SYS_SYSPROTO_H_
3956 struct getdirentries_args {
3957 	int	fd;
3958 	char	*buf;
3959 	u_int	count;
3960 	long	*basep;
3961 };
3962 #endif
3963 int
3964 getdirentries(td, uap)
3965 	struct thread *td;
3966 	register struct getdirentries_args /* {
3967 		syscallarg(int) fd;
3968 		syscallarg(char *) buf;
3969 		syscallarg(u_int) count;
3970 		syscallarg(long *) basep;
3971 	} */ *uap;
3972 {
3973 	struct vnode *vp;
3974 	struct file *fp;
3975 	struct uio auio;
3976 	struct iovec aiov;
3977 	long loff;
3978 	int error, eofflag;
3979 
3980 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3981 		return (error);
3982 	if ((fp->f_flag & FREAD) == 0) {
3983 		fdrop(fp, td);
3984 		return (EBADF);
3985 	}
3986 	vp = (struct vnode *)fp->f_data;
3987 unionread:
3988 	if (vp->v_type != VDIR) {
3989 		fdrop(fp, td);
3990 		return (EINVAL);
3991 	}
3992 	aiov.iov_base = SCARG(uap, buf);
3993 	aiov.iov_len = SCARG(uap, count);
3994 	auio.uio_iov = &aiov;
3995 	auio.uio_iovcnt = 1;
3996 	auio.uio_rw = UIO_READ;
3997 	auio.uio_segflg = UIO_USERSPACE;
3998 	auio.uio_td = td;
3999 	auio.uio_resid = SCARG(uap, count);
4000 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
4001 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4002 	loff = auio.uio_offset = fp->f_offset;
4003 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
4004 	fp->f_offset = auio.uio_offset;
4005 	VOP_UNLOCK(vp, 0, td);
4006 	if (error) {
4007 		fdrop(fp, td);
4008 		return (error);
4009 	}
4010 	if (SCARG(uap, count) == auio.uio_resid) {
4011 		if (union_dircheckp) {
4012 			error = union_dircheckp(td, &vp, fp);
4013 			if (error == -1)
4014 				goto unionread;
4015 			if (error) {
4016 				fdrop(fp, td);
4017 				return (error);
4018 			}
4019 		}
4020 		if ((vp->v_flag & VROOT) &&
4021 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
4022 			struct vnode *tvp = vp;
4023 			vp = vp->v_mount->mnt_vnodecovered;
4024 			VREF(vp);
4025 			fp->f_data = (caddr_t) vp;
4026 			fp->f_offset = 0;
4027 			vrele(tvp);
4028 			goto unionread;
4029 		}
4030 	}
4031 	if (SCARG(uap, basep) != NULL) {
4032 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
4033 		    sizeof(long));
4034 	}
4035 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
4036 	fdrop(fp, td);
4037 	return (error);
4038 }
4039 #ifndef _SYS_SYSPROTO_H_
4040 struct getdents_args {
4041 	int fd;
4042 	char *buf;
4043 	size_t count;
4044 };
4045 #endif
4046 int
4047 getdents(td, uap)
4048 	struct thread *td;
4049 	register struct getdents_args /* {
4050 		syscallarg(int) fd;
4051 		syscallarg(char *) buf;
4052 		syscallarg(u_int) count;
4053 	} */ *uap;
4054 {
4055 	struct getdirentries_args ap;
4056 	ap.fd = uap->fd;
4057 	ap.buf = uap->buf;
4058 	ap.count = uap->count;
4059 	ap.basep = NULL;
4060 	return getdirentries(td, &ap);
4061 }
4062 
4063 /*
4064  * Set the mode mask for creation of filesystem nodes.
4065  *
4066  * MP SAFE
4067  */
4068 #ifndef _SYS_SYSPROTO_H_
4069 struct umask_args {
4070 	int	newmask;
4071 };
4072 #endif
4073 int
4074 umask(td, uap)
4075 	struct thread *td;
4076 	struct umask_args /* {
4077 		syscallarg(int) newmask;
4078 	} */ *uap;
4079 {
4080 	register struct filedesc *fdp;
4081 
4082 	FILEDESC_LOCK(td->td_proc->p_fd);
4083 	fdp = td->td_proc->p_fd;
4084 	td->td_retval[0] = fdp->fd_cmask;
4085 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
4086 	FILEDESC_UNLOCK(td->td_proc->p_fd);
4087 	return (0);
4088 }
4089 
4090 /*
4091  * Void all references to file by ripping underlying filesystem
4092  * away from vnode.
4093  */
4094 #ifndef _SYS_SYSPROTO_H_
4095 struct revoke_args {
4096 	char	*path;
4097 };
4098 #endif
4099 /* ARGSUSED */
4100 int
4101 revoke(td, uap)
4102 	struct thread *td;
4103 	register struct revoke_args /* {
4104 		syscallarg(char *) path;
4105 	} */ *uap;
4106 {
4107 	struct mount *mp;
4108 	struct vnode *vp;
4109 	struct vattr vattr;
4110 	int error;
4111 	struct nameidata nd;
4112 
4113 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
4114 	    td);
4115 	if ((error = namei(&nd)) != 0)
4116 		return (error);
4117 	vp = nd.ni_vp;
4118 	NDFREE(&nd, NDF_ONLY_PNBUF);
4119 	if (vp->v_type != VCHR) {
4120 		vput(vp);
4121 		return (EINVAL);
4122 	}
4123 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
4124 	if (error) {
4125 		vput(vp);
4126 		return (error);
4127 	}
4128 	VOP_UNLOCK(vp, 0, td);
4129 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4130 		error = suser_cred(td->td_ucred, PRISON_ROOT);
4131 		if (error)
4132 			goto out;
4133 	}
4134 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4135 		goto out;
4136 	if (vcount(vp) > 1)
4137 		VOP_REVOKE(vp, REVOKEALL);
4138 	vn_finished_write(mp);
4139 out:
4140 	vrele(vp);
4141 	return (error);
4142 }
4143 
4144 /*
4145  * Convert a user file descriptor to a kernel file entry.
4146  * The file entry is locked upon returning.
4147  */
4148 int
4149 getvnode(fdp, fd, fpp)
4150 	struct filedesc *fdp;
4151 	int fd;
4152 	struct file **fpp;
4153 {
4154 	int error;
4155 	struct file *fp;
4156 
4157 	fp = NULL;
4158 	if (fdp == NULL)
4159 		error = EBADF;
4160 	else {
4161 		FILEDESC_LOCK(fdp);
4162 		if ((u_int)fd >= fdp->fd_nfiles ||
4163 		    (fp = fdp->fd_ofiles[fd]) == NULL)
4164 			error = EBADF;
4165 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
4166 			fp = NULL;
4167 			error = EINVAL;
4168 		} else {
4169 			fhold(fp);
4170 			error = 0;
4171 		}
4172 		FILEDESC_UNLOCK(fdp);
4173 	}
4174 	*fpp = fp;
4175 	return (error);
4176 }
4177 /*
4178  * Get (NFS) file handle
4179  */
4180 #ifndef _SYS_SYSPROTO_H_
4181 struct getfh_args {
4182 	char	*fname;
4183 	fhandle_t *fhp;
4184 };
4185 #endif
4186 int
4187 getfh(td, uap)
4188 	struct thread *td;
4189 	register struct getfh_args *uap;
4190 {
4191 	struct nameidata nd;
4192 	fhandle_t fh;
4193 	register struct vnode *vp;
4194 	int error;
4195 
4196 	/*
4197 	 * Must be super user
4198 	 */
4199 	error = suser(td);
4200 	if (error)
4201 		return (error);
4202 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
4203 	error = namei(&nd);
4204 	if (error)
4205 		return (error);
4206 	NDFREE(&nd, NDF_ONLY_PNBUF);
4207 	vp = nd.ni_vp;
4208 	bzero(&fh, sizeof(fh));
4209 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4210 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4211 	vput(vp);
4212 	if (error)
4213 		return (error);
4214 	error = copyout(&fh, uap->fhp, sizeof (fh));
4215 	return (error);
4216 }
4217 
4218 /*
4219  * syscall for the rpc.lockd to use to translate a NFS file handle into
4220  * an open descriptor.
4221  *
4222  * warning: do not remove the suser() call or this becomes one giant
4223  * security hole.
4224  */
4225 #ifndef _SYS_SYSPROTO_H_
4226 struct fhopen_args {
4227 	const struct fhandle *u_fhp;
4228 	int flags;
4229 };
4230 #endif
4231 int
4232 fhopen(td, uap)
4233 	struct thread *td;
4234 	struct fhopen_args /* {
4235 		syscallarg(const struct fhandle *) u_fhp;
4236 		syscallarg(int) flags;
4237 	} */ *uap;
4238 {
4239 	struct proc *p = td->td_proc;
4240 	struct mount *mp;
4241 	struct vnode *vp;
4242 	struct fhandle fhp;
4243 	struct vattr vat;
4244 	struct vattr *vap = &vat;
4245 	struct flock lf;
4246 	struct file *fp;
4247 	register struct filedesc *fdp = p->p_fd;
4248 	int fmode, mode, error, type;
4249 	struct file *nfp;
4250 	int indx;
4251 
4252 	/*
4253 	 * Must be super user
4254 	 */
4255 	error = suser(td);
4256 	if (error)
4257 		return (error);
4258 
4259 	fmode = FFLAGS(SCARG(uap, flags));
4260 	/* why not allow a non-read/write open for our lockd? */
4261 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4262 		return (EINVAL);
4263 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
4264 	if (error)
4265 		return(error);
4266 	/* find the mount point */
4267 	mp = vfs_getvfs(&fhp.fh_fsid);
4268 	if (mp == NULL)
4269 		return (ESTALE);
4270 	/* now give me my vnode, it gets returned to me locked */
4271 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4272 	if (error)
4273 		return (error);
4274  	/*
4275 	 * from now on we have to make sure not
4276 	 * to forget about the vnode
4277 	 * any error that causes an abort must vput(vp)
4278 	 * just set error = err and 'goto bad;'.
4279 	 */
4280 
4281 	/*
4282 	 * from vn_open
4283 	 */
4284 	if (vp->v_type == VLNK) {
4285 		error = EMLINK;
4286 		goto bad;
4287 	}
4288 	if (vp->v_type == VSOCK) {
4289 		error = EOPNOTSUPP;
4290 		goto bad;
4291 	}
4292 	mode = 0;
4293 	if (fmode & (FWRITE | O_TRUNC)) {
4294 		if (vp->v_type == VDIR) {
4295 			error = EISDIR;
4296 			goto bad;
4297 		}
4298 		error = vn_writechk(vp);
4299 		if (error)
4300 			goto bad;
4301 		mode |= VWRITE;
4302 	}
4303 	if (fmode & FREAD)
4304 		mode |= VREAD;
4305 	if (mode) {
4306 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4307 		if (error)
4308 			goto bad;
4309 	}
4310 	if (fmode & O_TRUNC) {
4311 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4312 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4313 			vrele(vp);
4314 			return (error);
4315 		}
4316 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4317 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4318 		VATTR_NULL(vap);
4319 		vap->va_size = 0;
4320 		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4321 		vn_finished_write(mp);
4322 		if (error)
4323 			goto bad;
4324 	}
4325 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
4326 	if (error)
4327 		goto bad;
4328 	/*
4329 	 * Make sure that a VM object is created for VMIO support.
4330 	 */
4331 	if (vn_canvmio(vp) == TRUE) {
4332 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
4333 			goto bad;
4334 	}
4335 	if (fmode & FWRITE)
4336 		vp->v_writecount++;
4337 
4338 	/*
4339 	 * end of vn_open code
4340 	 */
4341 
4342 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4343 		if (fmode & FWRITE)
4344 			vp->v_writecount--;
4345 		goto bad;
4346 	}
4347 	fp = nfp;
4348 
4349 	/*
4350 	 * Hold an extra reference to avoid having fp ripped out
4351 	 * from under us while we block in the lock op
4352 	 */
4353 	fhold(fp);
4354 	nfp->f_data = (caddr_t)vp;
4355 	nfp->f_flag = fmode & FMASK;
4356 	nfp->f_ops = &vnops;
4357 	nfp->f_type = DTYPE_VNODE;
4358 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4359 		lf.l_whence = SEEK_SET;
4360 		lf.l_start = 0;
4361 		lf.l_len = 0;
4362 		if (fmode & O_EXLOCK)
4363 			lf.l_type = F_WRLCK;
4364 		else
4365 			lf.l_type = F_RDLCK;
4366 		type = F_FLOCK;
4367 		if ((fmode & FNONBLOCK) == 0)
4368 			type |= F_WAIT;
4369 		VOP_UNLOCK(vp, 0, td);
4370 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4371 			/*
4372 			 * The lock request failed.  Normally close the
4373 			 * descriptor but handle the case where someone might
4374 			 * have dup()d or close()d it when we weren't looking.
4375 			 */
4376 			FILEDESC_LOCK(fdp);
4377 			if (fdp->fd_ofiles[indx] == fp) {
4378 				fdp->fd_ofiles[indx] = NULL;
4379 				FILEDESC_UNLOCK(fdp);
4380 				fdrop(fp, td);
4381 			} else
4382 				FILEDESC_UNLOCK(fdp);
4383 			/*
4384 			 * release our private reference
4385 			 */
4386 			fdrop(fp, td);
4387 			return(error);
4388 		}
4389 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4390 		fp->f_flag |= FHASLOCK;
4391 	}
4392 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4393 		vfs_object_create(vp, td, td->td_ucred);
4394 
4395 	VOP_UNLOCK(vp, 0, td);
4396 	fdrop(fp, td);
4397 	td->td_retval[0] = indx;
4398 	return (0);
4399 
4400 bad:
4401 	vput(vp);
4402 	return (error);
4403 }
4404 
4405 /*
4406  * Stat an (NFS) file handle.
4407  */
4408 #ifndef _SYS_SYSPROTO_H_
4409 struct fhstat_args {
4410 	struct fhandle *u_fhp;
4411 	struct stat *sb;
4412 };
4413 #endif
4414 int
4415 fhstat(td, uap)
4416 	struct thread *td;
4417 	register struct fhstat_args /* {
4418 		syscallarg(struct fhandle *) u_fhp;
4419 		syscallarg(struct stat *) sb;
4420 	} */ *uap;
4421 {
4422 	struct stat sb;
4423 	fhandle_t fh;
4424 	struct mount *mp;
4425 	struct vnode *vp;
4426 	int error;
4427 
4428 	/*
4429 	 * Must be super user
4430 	 */
4431 	error = suser(td);
4432 	if (error)
4433 		return (error);
4434 
4435 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
4436 	if (error)
4437 		return (error);
4438 
4439 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4440 		return (ESTALE);
4441 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4442 		return (error);
4443 	error = vn_stat(vp, &sb, td);
4444 	vput(vp);
4445 	if (error)
4446 		return (error);
4447 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
4448 	return (error);
4449 }
4450 
4451 /*
4452  * Implement fstatfs() for (NFS) file handles.
4453  */
4454 #ifndef _SYS_SYSPROTO_H_
4455 struct fhstatfs_args {
4456 	struct fhandle *u_fhp;
4457 	struct statfs *buf;
4458 };
4459 #endif
4460 int
4461 fhstatfs(td, uap)
4462 	struct thread *td;
4463 	struct fhstatfs_args /* {
4464 		syscallarg(struct fhandle) *u_fhp;
4465 		syscallarg(struct statfs) *buf;
4466 	} */ *uap;
4467 {
4468 	struct statfs *sp;
4469 	struct mount *mp;
4470 	struct vnode *vp;
4471 	struct statfs sb;
4472 	fhandle_t fh;
4473 	int error;
4474 
4475 	/*
4476 	 * Must be super user
4477 	 */
4478 	error = suser(td);
4479 	if (error)
4480 		return (error);
4481 
4482 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
4483 		return (error);
4484 
4485 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4486 		return (ESTALE);
4487 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4488 		return (error);
4489 	mp = vp->v_mount;
4490 	sp = &mp->mnt_stat;
4491 	vput(vp);
4492 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4493 		return (error);
4494 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4495 	if (suser(td)) {
4496 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
4497 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4498 		sp = &sb;
4499 	}
4500 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
4501 }
4502 
4503 /*
4504  * Syscall to push extended attribute configuration information into the
4505  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4506  * a command (int cmd), and attribute name and misc data.  For now, the
4507  * attribute name is left in userspace for consumption by the VFS_op.
4508  * It will probably be changed to be copied into sysspace by the
4509  * syscall in the future, once issues with various consumers of the
4510  * attribute code have raised their hands.
4511  *
4512  * Currently this is used only by UFS Extended Attributes.
4513  */
4514 int
4515 extattrctl(td, uap)
4516 	struct thread *td;
4517 	struct extattrctl_args /* {
4518 		syscallarg(const char *) path;
4519 		syscallarg(int) cmd;
4520 		syscallarg(const char *) filename;
4521 		syscallarg(int) attrnamespace;
4522 		syscallarg(const char *) attrname;
4523 	} */ *uap;
4524 {
4525 	struct vnode *filename_vp;
4526 	struct nameidata nd;
4527 	struct mount *mp, *mp_writable;
4528 	char attrname[EXTATTR_MAXNAMELEN];
4529 	int error;
4530 
4531 	/*
4532 	 * uap->attrname is not always defined.  We check again later when we
4533 	 * invoke the VFS call so as to pass in NULL there if needed.
4534 	 */
4535 	if (uap->attrname != NULL) {
4536 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4537 		    NULL);
4538 		if (error)
4539 			return (error);
4540 	}
4541 
4542 	/*
4543 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4544 	 * which VFS_EXTATTRCTL() will later release.
4545 	 */
4546 	filename_vp = NULL;
4547 	if (uap->filename != NULL) {
4548 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4549 		    uap->filename, td);
4550 		if ((error = namei(&nd)) != 0)
4551 			return (error);
4552 		filename_vp = nd.ni_vp;
4553 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4554 	}
4555 
4556 	/* uap->path is always defined. */
4557 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4558 	if ((error = namei(&nd)) != 0) {
4559 		if (filename_vp != NULL)
4560 			vput(filename_vp);
4561 		return (error);
4562 	}
4563 	mp = nd.ni_vp->v_mount;
4564 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4565 	NDFREE(&nd, 0);
4566 	if (error) {
4567 		if (filename_vp != NULL)
4568 			vput(filename_vp);
4569 		return (error);
4570 	}
4571 
4572 	if (uap->attrname != NULL) {
4573 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4574 		    uap->attrnamespace, attrname, td);
4575 	} else {
4576 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4577 		    uap->attrnamespace, NULL, td);
4578 	}
4579 
4580 	vn_finished_write(mp_writable);
4581 	/*
4582 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4583 	 * filename_vp, so vrele it if it is defined.
4584 	 */
4585 	if (filename_vp != NULL)
4586 		vrele(filename_vp);
4587 
4588 	return (error);
4589 }
4590 
4591 /*-
4592  * Set a named extended attribute on a file or directory
4593  *
4594  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4595  *            kernelspace string pointer "attrname", userspace buffer
4596  *            pointer "data", buffer length "nbytes", thread "td".
4597  * Returns: 0 on success, an error number otherwise
4598  * Locks: none
4599  * References: vp must be a valid reference for the duration of the call
4600  */
4601 static int
4602 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4603     void *data, size_t nbytes, struct thread *td)
4604 {
4605 	struct mount *mp;
4606 	struct uio auio;
4607 	struct iovec aiov;
4608 	ssize_t cnt;
4609 	int error;
4610 
4611 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4612 		return (error);
4613 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4614 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4615 
4616 	aiov.iov_base = data;
4617 	aiov.iov_len = nbytes;
4618 	auio.uio_iov = &aiov;
4619 	auio.uio_iovcnt = 1;
4620 	auio.uio_offset = 0;
4621 	if (nbytes > INT_MAX) {
4622 		error = EINVAL;
4623 		goto done;
4624 	}
4625 	auio.uio_resid = nbytes;
4626 	auio.uio_rw = UIO_WRITE;
4627 	auio.uio_segflg = UIO_USERSPACE;
4628 	auio.uio_td = td;
4629 	cnt = nbytes;
4630 
4631 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4632 	    td->td_ucred, td);
4633 	cnt -= auio.uio_resid;
4634 	td->td_retval[0] = cnt;
4635 
4636 done:
4637 	VOP_UNLOCK(vp, 0, td);
4638 	vn_finished_write(mp);
4639 	return (error);
4640 }
4641 
4642 int
4643 extattr_set_file(td, uap)
4644 	struct thread *td;
4645 	struct extattr_set_file_args /* {
4646 		syscallarg(const char *) path;
4647 		syscallarg(int) attrnamespace;
4648 		syscallarg(const char *) attrname;
4649 		syscallarg(void *) data;
4650 		syscallarg(size_t) nbytes;
4651 	} */ *uap;
4652 {
4653 	struct nameidata nd;
4654 	char attrname[EXTATTR_MAXNAMELEN];
4655 	int error;
4656 
4657 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4658 	if (error)
4659 		return (error);
4660 
4661 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4662 	if ((error = namei(&nd)) != 0)
4663 		return (error);
4664 	NDFREE(&nd, NDF_ONLY_PNBUF);
4665 
4666 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4667 	    uap->data, uap->nbytes, td);
4668 
4669 	vrele(nd.ni_vp);
4670 	return (error);
4671 }
4672 
4673 int
4674 extattr_set_fd(td, uap)
4675 	struct thread *td;
4676 	struct extattr_set_fd_args /* {
4677 		syscallarg(int) fd;
4678 		syscallarg(int) attrnamespace;
4679 		syscallarg(const char *) attrname;
4680 		syscallarg(void *) data;
4681 		syscallarg(size_t) nbytes;
4682 	} */ *uap;
4683 {
4684 	struct file *fp;
4685 	char attrname[EXTATTR_MAXNAMELEN];
4686 	int error;
4687 
4688 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4689 	if (error)
4690 		return (error);
4691 
4692 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4693 		return (error);
4694 
4695 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4696 	    attrname, uap->data, uap->nbytes, td);
4697 	fdrop(fp, td);
4698 
4699 	return (error);
4700 }
4701 
4702 /*-
4703  * Get a named extended attribute on a file or directory
4704  *
4705  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4706  *            kernelspace string pointer "attrname", userspace buffer
4707  *            pointer "data", buffer length "nbytes", thread "td".
4708  * Returns: 0 on success, an error number otherwise
4709  * Locks: none
4710  * References: vp must be a valid reference for the duration of the call
4711  */
4712 static int
4713 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4714     void *data, size_t nbytes, struct thread *td)
4715 {
4716 	struct uio auio, *auiop;
4717 	struct iovec aiov;
4718 	ssize_t cnt;
4719 	size_t size, *sizep;
4720 	int error;
4721 
4722 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4723 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4724 
4725 	/*
4726 	 * Slightly unusual semantics: if the user provides a NULL data
4727 	 * pointer, they don't want to receive the data, just the
4728 	 * maximum read length.
4729 	 */
4730 	auiop = NULL;
4731 	sizep = NULL;
4732 	cnt = 0;
4733 	if (data != NULL) {
4734 		aiov.iov_base = data;
4735 		aiov.iov_len = nbytes;
4736 		auio.uio_iov = &aiov;
4737 		auio.uio_offset = 0;
4738 		if (nbytes > INT_MAX) {
4739 			error = EINVAL;
4740 			goto done;
4741 		}
4742 		auio.uio_resid = nbytes;
4743 		auio.uio_rw = UIO_READ;
4744 		auio.uio_segflg = UIO_USERSPACE;
4745 		auio.uio_td = td;
4746 		auiop = &auio;
4747 		cnt = nbytes;
4748 	} else
4749 		sizep = &size;
4750 
4751 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4752 	    td->td_ucred, td);
4753 
4754 	if (auiop != NULL) {
4755 		cnt -= auio.uio_resid;
4756 		td->td_retval[0] = cnt;
4757 	} else
4758 		td->td_retval[0] = size;
4759 
4760 done:
4761 	VOP_UNLOCK(vp, 0, td);
4762 	return (error);
4763 }
4764 
4765 int
4766 extattr_get_file(td, uap)
4767 	struct thread *td;
4768 	struct extattr_get_file_args /* {
4769 		syscallarg(const char *) path;
4770 		syscallarg(int) attrnamespace;
4771 		syscallarg(const char *) attrname;
4772 		syscallarg(void *) data;
4773 		syscallarg(size_t) nbytes;
4774 	} */ *uap;
4775 {
4776 	struct nameidata nd;
4777 	char attrname[EXTATTR_MAXNAMELEN];
4778 	int error;
4779 
4780 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4781 	if (error)
4782 		return (error);
4783 
4784 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4785 	if ((error = namei(&nd)) != 0)
4786 		return (error);
4787 	NDFREE(&nd, NDF_ONLY_PNBUF);
4788 
4789 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4790 	    uap->data, uap->nbytes, td);
4791 
4792 	vrele(nd.ni_vp);
4793 	return (error);
4794 }
4795 
4796 int
4797 extattr_get_fd(td, uap)
4798 	struct thread *td;
4799 	struct extattr_get_fd_args /* {
4800 		syscallarg(int) fd;
4801 		syscallarg(int) attrnamespace;
4802 		syscallarg(const char *) attrname;
4803 		syscallarg(void *) data;
4804 		syscallarg(size_t) nbytes;
4805 	} */ *uap;
4806 {
4807 	struct file *fp;
4808 	char attrname[EXTATTR_MAXNAMELEN];
4809 	int error;
4810 
4811 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4812 	if (error)
4813 		return (error);
4814 
4815 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4816 		return (error);
4817 
4818 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4819 	    attrname, uap->data, uap->nbytes, td);
4820 
4821 	fdrop(fp, td);
4822 	return (error);
4823 }
4824 
4825 /*
4826  * extattr_delete_vp(): Delete a named extended attribute on a file or
4827  *                      directory
4828  *
4829  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4830  *            kernelspace string pointer "attrname", proc "p"
4831  * Returns: 0 on success, an error number otherwise
4832  * Locks: none
4833  * References: vp must be a valid reference for the duration of the call
4834  */
4835 static int
4836 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4837     struct thread *td)
4838 {
4839 	struct mount *mp;
4840 	int error;
4841 
4842 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4843 		return (error);
4844 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4845 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4846 
4847 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4848 	    td);
4849 
4850 	VOP_UNLOCK(vp, 0, td);
4851 	vn_finished_write(mp);
4852 	return (error);
4853 }
4854 
4855 int
4856 extattr_delete_file(td, uap)
4857 	struct thread *td;
4858 	struct extattr_delete_file_args /* {
4859 		syscallarg(const char *) path;
4860 		syscallarg(int) attrnamespace;
4861 		syscallarg(const char *) attrname;
4862 	} */ *uap;
4863 {
4864 	struct nameidata nd;
4865 	char attrname[EXTATTR_MAXNAMELEN];
4866 	int error;
4867 
4868 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4869 	if (error)
4870 		return(error);
4871 
4872 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4873 	if ((error = namei(&nd)) != 0)
4874 		return(error);
4875 	NDFREE(&nd, NDF_ONLY_PNBUF);
4876 
4877 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4878 
4879 	vrele(nd.ni_vp);
4880 	return(error);
4881 }
4882 
4883 int
4884 extattr_delete_fd(td, uap)
4885 	struct thread *td;
4886 	struct extattr_delete_fd_args /* {
4887 		syscallarg(int) fd;
4888 		syscallarg(int) attrnamespace;
4889 		syscallarg(const char *) attrname;
4890 	} */ *uap;
4891 {
4892 	struct file *fp;
4893 	struct vnode *vp;
4894 	char attrname[EXTATTR_MAXNAMELEN];
4895 	int error;
4896 
4897 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4898 	if (error)
4899 		return (error);
4900 
4901 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4902 		return (error);
4903 	vp = (struct vnode *)fp->f_data;
4904 
4905 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4906 	    uap->attrnamespace, attrname, td);
4907 
4908 	fdrop(fp, td);
4909 	return (error);
4910 }
4911