xref: /freebsd/sys/kern/vfs_extattr.c (revision 3ff369fed2a08f32dda232c10470b949bef9489f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39  * $FreeBSD$
40  */
41 
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
44 #include "opt_ffs.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/linker.h>
61 #include <sys/stat.h>
62 #include <sys/sx.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
68 #include <sys/jail.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/limits.h>
72 #include <machine/stdarg.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_page.h>
77 #include <vm/uma.h>
78 
79 static int change_dir(struct nameidata *ndp, struct thread *td);
80 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 static void vfs_freeopts(struct vfsoptlist *opt);
91 static int vfs_nmount(struct thread *td, int, struct uio *);
92 
93 static int	usermount = 0;	/* if 1, non-root can mount fs. */
94 
95 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
96 
97 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
98 
99 /*
100  * Virtual File System System Calls
101  */
102 
103 #ifndef _SYS_SYSPROTO_H_
104 struct nmount_args {
105 	struct iovec    *iovp;
106 	unsigned int    iovcnt;
107 	int             flags;
108 };
109 #endif
110 /* ARGSUSED */
111 int
112 nmount(td, uap)
113 	struct thread *td;
114 	struct nmount_args /* {
115 		syscallarg(struct iovec *) iovp;
116 		syscallarg(unsigned int) iovcnt;
117 		syscallarg(int) flags;
118 	} */ *uap;
119 {
120 	struct uio auio;
121 	struct iovec *iov, *needfree;
122 	struct iovec aiov[UIO_SMALLIOV];
123 	long error, i;
124 	u_int iovlen, iovcnt;
125 
126 	iovcnt = SCARG(uap, iovcnt);
127 	iovlen = iovcnt * sizeof (struct iovec);
128 	/*
129 	 * Check that we have an even number of iovec's
130 	 * and that we have at least two options.
131 	 */
132 	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
133 		return (EINVAL);
134 
135 	if (iovcnt > UIO_SMALLIOV) {
136 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
137 		needfree = iov;
138 	} else {
139 		iov = aiov;
140 		needfree = NULL;
141 	}
142 	auio.uio_iov = iov;
143 	auio.uio_iovcnt = iovcnt;
144 	auio.uio_rw = UIO_WRITE;
145 	auio.uio_segflg = UIO_USERSPACE;
146 	auio.uio_td = td;
147 	auio.uio_offset = 0;
148 	auio.uio_resid = 0;
149 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
150 		goto finish;
151 	for (i = 0; i < iovcnt; i++) {
152 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
153 			error = EINVAL;
154 			goto finish;
155 		}
156 		auio.uio_resid += iov->iov_len;
157 		iov++;
158 	}
159 	error = vfs_nmount(td, SCARG(uap, flags), &auio);
160 finish:
161 	if (needfree != NULL)
162 		free(needfree, M_TEMP);
163 	return (error);
164 }
165 
166 /*
167  * Release all resources related to the
168  * mount options.
169  */
170 static void
171 vfs_freeopts(struct vfsoptlist *opt)
172 {
173 
174 	free(opt->opt, M_MOUNT);
175 	free(opt->optbuf, M_MOUNT);
176 	free(opt, M_MOUNT);
177 }
178 
179 int
180 kernel_mount(iovp, iovcnt, flags)
181 	struct iovec *iovp;
182 	unsigned int iovcnt;
183 	int flags;
184 {
185 	struct uio auio;
186 	struct iovec *iov;
187 	int error, i;
188 
189 	/*
190 	 * Check that we have an even number of iovec's
191 	 * and that we have at least two options.
192 	 */
193 	if ((iovcnt & 1) || (iovcnt < 4))
194 		return (EINVAL);
195 
196 	auio.uio_iov = iovp;
197 	auio.uio_iovcnt = iovcnt;
198 	auio.uio_rw = UIO_WRITE;
199 	auio.uio_segflg = UIO_SYSSPACE;
200 	auio.uio_offset = 0;
201 	auio.uio_td = NULL;
202 	auio.uio_resid = 0;
203 	iov = iovp;
204 	for (i = 0; i < iovcnt; i++) {
205 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
206 			return (EINVAL);
207 		}
208 		auio.uio_resid += iov->iov_len;
209 		iov++;
210 	}
211 
212 	error = vfs_nmount(curthread, flags, &auio);
213 	return (error);
214 }
215 
216 int
217 kernel_vmount(int flags, ...)
218 {
219 	struct iovec *iovp;
220 	struct uio auio;
221 	va_list ap;
222 	unsigned int iovcnt, iovlen, len;
223 	const char *cp;
224 	char *buf, *pos;
225 	size_t n;
226 	int error, i;
227 
228 	len = 0;
229 	va_start(ap, flags);
230 	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
231 		len += strlen(cp) + 1;
232 	va_end(ap);
233 
234 	if (iovcnt < 4 || iovcnt & 1)
235 		return (EINVAL);
236 
237 	iovlen = iovcnt * sizeof (struct iovec);
238 	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
239 	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
240 	pos = buf;
241 	va_start(ap, flags);
242 	for (i = 0; i < iovcnt; i++) {
243 		cp = va_arg(ap, const char *);
244 		copystr(cp, pos, len - (pos - buf), &n);
245 		iovp[i].iov_base = pos;
246 		iovp[i].iov_len = n;
247 		pos += n;
248 	}
249 	va_end(ap);
250 
251 	auio.uio_iov = iovp;
252 	auio.uio_iovcnt = iovcnt;
253 	auio.uio_rw = UIO_WRITE;
254 	auio.uio_segflg = UIO_SYSSPACE;
255 	auio.uio_offset = 0;
256 	auio.uio_td = NULL;
257 	auio.uio_resid = len;
258 
259 	error = vfs_nmount(curthread, flags, &auio);
260 	FREE(iovp, M_MOUNT);
261 	FREE(buf, M_MOUNT);
262 	return (error);
263 }
264 
265 /*
266  * vfs_nmount(): actually attempt a filesystem mount.
267  */
268 static int
269 vfs_nmount(td, fsflags, fsoptions)
270 	struct thread *td;
271 	int fsflags;		/* Flags common to all filesystems. */
272 	struct uio *fsoptions;	/* Options local to the filesystem. */
273 {
274 	linker_file_t lf;
275 	struct vnode *vp;
276 	struct mount *mp;
277 	struct vfsconf *vfsp;
278 	struct iovec *cur;
279 	struct vfsoptlist *optlist;
280 	struct vfsopt *opt;
281 	char *buf, *fstype, *fspath;
282 	int error, flag = 0, kern_flag = 0, i, len, optcnt;
283 	int offset, iovcnt, fstypelen, fspathlen;
284 	struct vattr va;
285 	struct nameidata nd;
286 
287 	/*
288 	 * Allocate memory to hold the vfsopt structures.
289 	 */
290 	iovcnt = fsoptions->uio_iovcnt;
291 	optcnt = iovcnt >> 1;
292 	opt = malloc(sizeof (struct vfsopt) * optcnt,
293 	    M_MOUNT, M_WAITOK | M_ZERO);
294 
295 	/*
296 	 * Count the size of the buffer for options,
297 	 * allocate it, and fill in the vfsopt structures.
298 	 */
299 	cur = fsoptions->uio_iov;
300 	len = fsoptions->uio_resid;
301 	buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
302 
303 	optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK);
304 	optlist->opt = opt;
305 	optlist->optbuf = buf;
306 	optlist->optcnt = optcnt;
307 
308 	offset = i = 0;
309 	cur = fsoptions->uio_iov;
310 	while (i < optcnt) {
311 		opt[i].name = buf + offset;
312 		/* Ensure the name of an option is a string. */
313 		if (opt[i].name[cur->iov_len - 1] != '\0') {
314 			error = EINVAL;
315 			goto bad;
316 		}
317 		offset += cur->iov_len;
318 		cur++;
319 		opt[i].len = cur->iov_len;
320 		/*
321 		 * Prevent consumers from trying to
322 		 * read the value of a 0 length option
323 		 * by setting it to NULL.
324 		 */
325 		if (opt[i].len == 0)
326 			opt[i].value = NULL;
327 		else
328 			opt[i].value = buf + offset;
329 		offset += cur->iov_len;
330 		cur++; i++;
331 	}
332 
333 	if ((error = uiomove(buf, len, fsoptions)) != 0)
334 		goto bad;
335 
336 	/*
337 	 * We need these two options before the others,
338 	 * and they are mandatory for any filesystem.
339 	 * Ensure they are NUL terminated as well.
340 	 */
341 	fstypelen = 0;
342 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
343 	if (error || fstype[fstypelen - 1] != '\0') {
344 		error = EINVAL;
345 		goto bad;
346 	}
347 	fspathlen = 0;
348 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
349 	if (error || fspath[fspathlen - 1] != '\0') {
350 		error = EINVAL;
351 		goto bad;
352 	}
353 
354 	/*
355 	 * Be ultra-paranoid about making sure the type and fspath
356 	 * variables will fit in our mp buffers, including the
357 	 * terminating NUL.
358 	 */
359 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
360 		error = ENAMETOOLONG;
361 		goto bad;
362 	}
363 
364 	if (usermount == 0) {
365 	       	error = suser(td);
366 		if (error)
367 			goto bad;
368 	}
369 	/*
370 	 * Do not allow NFS export by non-root users.
371 	 */
372 	if (fsflags & MNT_EXPORTED) {
373 		error = suser(td);
374 		if (error)
375 			goto bad;
376 	}
377 	/*
378 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
379 	 */
380 	if (suser(td))
381 		fsflags |= MNT_NOSUID | MNT_NODEV;
382 	/*
383 	 * Get vnode to be covered
384 	 */
385 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
386 	if ((error = namei(&nd)) != 0)
387 		goto bad;
388 	NDFREE(&nd, NDF_ONLY_PNBUF);
389 	vp = nd.ni_vp;
390 	if (fsflags & MNT_UPDATE) {
391 		if ((vp->v_flag & VROOT) == 0) {
392 			vput(vp);
393 			error = EINVAL;
394 			goto bad;
395 		}
396 		mp = vp->v_mount;
397 		flag = mp->mnt_flag;
398 		kern_flag = mp->mnt_kern_flag;
399 		/*
400 		 * We only allow the filesystem to be reloaded if it
401 		 * is currently mounted read-only.
402 		 */
403 		if ((fsflags & MNT_RELOAD) &&
404 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
405 			vput(vp);
406 			error = EOPNOTSUPP;	/* Needs translation */
407 			goto bad;
408 		}
409 		/*
410 		 * Only root, or the user that did the original mount is
411 		 * permitted to update it.
412 		 */
413 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
414 			error = suser(td);
415 			if (error) {
416 				vput(vp);
417 				goto bad;
418 			}
419 		}
420 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
421 			vput(vp);
422 			error = EBUSY;
423 			goto bad;
424 		}
425 		mtx_lock(&vp->v_interlock);
426 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
427 			mtx_unlock(&vp->v_interlock);
428 			vfs_unbusy(mp, td);
429 			vput(vp);
430 			error = EBUSY;
431 			goto bad;
432 		}
433 		vp->v_flag |= VMOUNT;
434 		mtx_unlock(&vp->v_interlock);
435 		mp->mnt_flag |= fsflags &
436 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
437 		VOP_UNLOCK(vp, 0, td);
438 		goto update;
439 	}
440 	/*
441 	 * If the user is not root, ensure that they own the directory
442 	 * onto which we are attempting to mount.
443 	 */
444 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
445 	if (error) {
446 		vput(vp);
447 		goto bad;
448 	}
449 	if (va.va_uid != td->td_ucred->cr_uid) {
450 		error = suser(td);
451 		if (error) {
452 			vput(vp);
453 			goto bad;
454 		}
455 	}
456 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
457 		vput(vp);
458 		goto bad;
459 	}
460 	if (vp->v_type != VDIR) {
461 		vput(vp);
462 		error = ENOTDIR;
463 		goto bad;
464 	}
465 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
466 		if (!strcmp(vfsp->vfc_name, fstype))
467 			break;
468 	if (vfsp == NULL) {
469 		/* Only load modules for root (very important!). */
470 		error = suser(td);
471 		if (error) {
472 			vput(vp);
473 			goto bad;
474 		}
475 		error = securelevel_gt(td->td_ucred, 0);
476 		if (error) {
477 			vput(vp);
478 			goto bad;
479 		}
480 		error = linker_load_file(fstype, &lf);
481 		if (error || lf == NULL) {
482 			vput(vp);
483 			if (lf == NULL)
484 				error = ENODEV;
485 			goto bad;
486 		}
487 		lf->userrefs++;
488 		/* Look up again to see if the VFS was loaded. */
489 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
490 			if (!strcmp(vfsp->vfc_name, fstype))
491 				break;
492 		if (vfsp == NULL) {
493 			lf->userrefs--;
494 			linker_file_unload(lf);
495 			vput(vp);
496 			error = ENODEV;
497 			goto bad;
498 		}
499 	}
500 	mtx_lock(&vp->v_interlock);
501 	if ((vp->v_flag & VMOUNT) != 0 ||
502 	    vp->v_mountedhere != NULL) {
503 		mtx_unlock(&vp->v_interlock);
504 		vput(vp);
505 		error = EBUSY;
506 		goto bad;
507 	}
508 	vp->v_flag |= VMOUNT;
509 	mtx_unlock(&vp->v_interlock);
510 
511 	/*
512 	 * Allocate and initialize the filesystem.
513 	 */
514 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
515 	TAILQ_INIT(&mp->mnt_nvnodelist);
516 	TAILQ_INIT(&mp->mnt_reservedvnlist);
517 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
518 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
519 	mp->mnt_op = vfsp->vfc_vfsops;
520 	mp->mnt_vfc = vfsp;
521 	vfsp->vfc_refcount++;
522 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
523 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
524 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
525 	mp->mnt_vnodecovered = vp;
526 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
527 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
528 	mp->mnt_iosize_max = DFLTPHYS;
529 	VOP_UNLOCK(vp, 0, td);
530 
531 update:
532 	mp->mnt_optnew = optlist;
533 	/*
534 	 * Check if the fs implements the new VFS_NMOUNT()
535 	 * function, since the new system call was used.
536 	 */
537 	if (mp->mnt_op->vfs_mount != NULL) {
538 		printf("%s doesn't support the new mount syscall\n",
539 		    mp->mnt_vfc->vfc_name);
540 		mtx_lock(&vp->v_interlock);
541 		vp->v_flag &= ~VMOUNT;
542 		mtx_unlock(&vp->v_interlock);
543 		if (mp->mnt_flag & MNT_UPDATE)
544 			vfs_unbusy(mp, td);
545 		else {
546 			mp->mnt_vfc->vfc_refcount--;
547 			vfs_unbusy(mp, td);
548 			free((caddr_t)mp, M_MOUNT);
549 		}
550 		vrele(vp);
551 		error = EOPNOTSUPP;
552 		goto bad;
553 	}
554 
555 	/*
556 	 * Set the mount level flags.
557 	 */
558 	if (fsflags & MNT_RDONLY)
559 		mp->mnt_flag |= MNT_RDONLY;
560 	else if (mp->mnt_flag & MNT_RDONLY)
561 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
562 	mp->mnt_flag &=~ MNT_UPDATEMASK;
563 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
564 	/*
565 	 * Mount the filesystem.
566 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
567 	 * get.  No freeing of cn_pnbuf.
568 	 */
569 	error = VFS_NMOUNT(mp, &nd, td);
570 	if (!error) {
571 		if (mp->mnt_opt != NULL)
572 			vfs_freeopts(mp->mnt_opt);
573 		mp->mnt_opt = mp->mnt_optnew;
574 	}
575 	/*
576 	 * Prevent external consumers of mount
577 	 * options to read mnt_optnew.
578 	 */
579 	mp->mnt_optnew = NULL;
580 	if (mp->mnt_flag & MNT_UPDATE) {
581 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
582 			mp->mnt_flag &= ~MNT_RDONLY;
583 		mp->mnt_flag &=~
584 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
585 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
586 		if (error) {
587 			mp->mnt_flag = flag;
588 			mp->mnt_kern_flag = kern_flag;
589 		}
590 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
591 			if (mp->mnt_syncer == NULL)
592 				error = vfs_allocate_syncvnode(mp);
593 		} else {
594 			if (mp->mnt_syncer != NULL)
595 				vrele(mp->mnt_syncer);
596 			mp->mnt_syncer = NULL;
597 		}
598 		vfs_unbusy(mp, td);
599 		mtx_lock(&vp->v_interlock);
600 		vp->v_flag &= ~VMOUNT;
601 		mtx_unlock(&vp->v_interlock);
602 		vrele(vp);
603 		return (error);
604 	}
605 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
606 	/*
607 	 * Put the new filesystem on the mount list after root.
608 	 */
609 	cache_purge(vp);
610 	if (!error) {
611 		struct vnode *newdp;
612 
613 		mtx_lock(&vp->v_interlock);
614 		vp->v_flag &= ~VMOUNT;
615 		vp->v_mountedhere = mp;
616 		mtx_unlock(&vp->v_interlock);
617 		mtx_lock(&mountlist_mtx);
618 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
619 		mtx_unlock(&mountlist_mtx);
620 		if (VFS_ROOT(mp, &newdp))
621 			panic("mount: lost mount");
622 		checkdirs(vp, newdp);
623 		vput(newdp);
624 		VOP_UNLOCK(vp, 0, td);
625 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
626 			error = vfs_allocate_syncvnode(mp);
627 		vfs_unbusy(mp, td);
628 		if ((error = VFS_START(mp, 0, td)) != 0) {
629 			vrele(vp);
630 			goto bad;
631 		}
632 	} else {
633 		mtx_lock(&vp->v_interlock);
634 		vp->v_flag &= ~VMOUNT;
635 		mtx_unlock(&vp->v_interlock);
636 		mp->mnt_vfc->vfc_refcount--;
637 		vfs_unbusy(mp, td);
638 		free((caddr_t)mp, M_MOUNT);
639 		vput(vp);
640 		goto bad;
641 	}
642 	return (0);
643 bad:
644 	vfs_freeopts(optlist);
645 	return (error);
646 }
647 
648 /*
649  * Old Mount API.
650  */
651 #ifndef _SYS_SYSPROTO_H_
652 struct mount_args {
653 	char	*type;
654 	char	*path;
655 	int	flags;
656 	caddr_t	data;
657 };
658 #endif
659 /* ARGSUSED */
660 int
661 mount(td, uap)
662 	struct thread *td;
663 	struct mount_args /* {
664 		syscallarg(char *) type;
665 		syscallarg(char *) path;
666 		syscallarg(int) flags;
667 		syscallarg(caddr_t) data;
668 	} */ *uap;
669 {
670 	char *fstype;
671 	char *fspath;
672 	int error;
673 
674 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
675 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
676 
677 	/*
678 	 * vfs_mount() actually takes a kernel string for `type' and
679 	 * `path' now, so extract them.
680 	 */
681 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
682 	if (error)
683 		goto finish;
684 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
685 	if (error)
686 		goto finish;
687 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
688 	    SCARG(uap, data));
689 finish:
690 	free(fstype, M_TEMP);
691 	free(fspath, M_TEMP);
692 	return (error);
693 }
694 
695 /*
696  * vfs_mount(): actually attempt a filesystem mount.
697  *
698  * This routine is designed to be a "generic" entry point for routines
699  * that wish to mount a filesystem. All parameters except `fsdata' are
700  * pointers into kernel space. `fsdata' is currently still a pointer
701  * into userspace.
702  */
703 int
704 vfs_mount(td, fstype, fspath, fsflags, fsdata)
705 	struct thread *td;
706 	const char *fstype;
707 	char *fspath;
708 	int fsflags;
709 	void *fsdata;
710 {
711 	linker_file_t lf;
712 	struct vnode *vp;
713 	struct mount *mp;
714 	struct vfsconf *vfsp;
715 	int error, flag = 0, kern_flag = 0;
716 	struct vattr va;
717 	struct nameidata nd;
718 
719 	/*
720 	 * Be ultra-paranoid about making sure the type and fspath
721 	 * variables will fit in our mp buffers, including the
722 	 * terminating NUL.
723 	 */
724 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
725 		return (ENAMETOOLONG);
726 
727 	if (usermount == 0) {
728 		error = suser(td);
729 		if (error)
730 			return (error);
731 	}
732 	/*
733 	 * Do not allow NFS export by non-root users.
734 	 */
735 	if (fsflags & MNT_EXPORTED) {
736 		error = suser(td);
737 		if (error)
738 			return (error);
739 	}
740 	/*
741 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
742 	 */
743 	if (suser(td))
744 		fsflags |= MNT_NOSUID | MNT_NODEV;
745 	/*
746 	 * Get vnode to be covered
747 	 */
748 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
749 	if ((error = namei(&nd)) != 0)
750 		return (error);
751 	NDFREE(&nd, NDF_ONLY_PNBUF);
752 	vp = nd.ni_vp;
753 	if (fsflags & MNT_UPDATE) {
754 		if ((vp->v_flag & VROOT) == 0) {
755 			vput(vp);
756 			return (EINVAL);
757 		}
758 		mp = vp->v_mount;
759 		flag = mp->mnt_flag;
760 		kern_flag = mp->mnt_kern_flag;
761 		/*
762 		 * We only allow the filesystem to be reloaded if it
763 		 * is currently mounted read-only.
764 		 */
765 		if ((fsflags & MNT_RELOAD) &&
766 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
767 			vput(vp);
768 			return (EOPNOTSUPP);	/* Needs translation */
769 		}
770 		/*
771 		 * Only root, or the user that did the original mount is
772 		 * permitted to update it.
773 		 */
774 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
775 			error = suser(td);
776 			if (error) {
777 				vput(vp);
778 				return (error);
779 			}
780 		}
781 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
782 			vput(vp);
783 			return (EBUSY);
784 		}
785 		mtx_lock(&vp->v_interlock);
786 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
787 			mtx_unlock(&vp->v_interlock);
788 			vfs_unbusy(mp, td);
789 			vput(vp);
790 			return (EBUSY);
791 		}
792 		vp->v_flag |= VMOUNT;
793 		mtx_unlock(&vp->v_interlock);
794 		mp->mnt_flag |= fsflags &
795 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
796 		VOP_UNLOCK(vp, 0, td);
797 		goto update;
798 	}
799 	/*
800 	 * If the user is not root, ensure that they own the directory
801 	 * onto which we are attempting to mount.
802 	 */
803 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
804 	if (error) {
805 		vput(vp);
806 		return (error);
807 	}
808 	if (va.va_uid != td->td_ucred->cr_uid) {
809 		error = suser(td);
810 		if (error) {
811 			vput(vp);
812 			return (error);
813 		}
814 	}
815 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
816 		vput(vp);
817 		return (error);
818 	}
819 	if (vp->v_type != VDIR) {
820 		vput(vp);
821 		return (ENOTDIR);
822 	}
823 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
824 		if (!strcmp(vfsp->vfc_name, fstype))
825 			break;
826 	if (vfsp == NULL) {
827 		/* Only load modules for root (very important!). */
828 		error = suser(td);
829 		if (error) {
830 			vput(vp);
831 			return (error);
832 		}
833 		error = securelevel_gt(td->td_ucred, 0);
834 		if (error) {
835 			vput(vp);
836 			return (error);
837 		}
838 		error = linker_load_file(fstype, &lf);
839 		if (error || lf == NULL) {
840 			vput(vp);
841 			if (lf == NULL)
842 				error = ENODEV;
843 			return (error);
844 		}
845 		lf->userrefs++;
846 		/* Look up again to see if the VFS was loaded. */
847 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
848 			if (!strcmp(vfsp->vfc_name, fstype))
849 				break;
850 		if (vfsp == NULL) {
851 			lf->userrefs--;
852 			linker_file_unload(lf);
853 			vput(vp);
854 			return (ENODEV);
855 		}
856 	}
857 	mtx_lock(&vp->v_interlock);
858 	if ((vp->v_flag & VMOUNT) != 0 ||
859 	    vp->v_mountedhere != NULL) {
860 		mtx_unlock(&vp->v_interlock);
861 		vput(vp);
862 		return (EBUSY);
863 	}
864 	vp->v_flag |= VMOUNT;
865 	mtx_unlock(&vp->v_interlock);
866 
867 	/*
868 	 * Allocate and initialize the filesystem.
869 	 */
870 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
871 	TAILQ_INIT(&mp->mnt_nvnodelist);
872 	TAILQ_INIT(&mp->mnt_reservedvnlist);
873 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
874 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
875 	mp->mnt_op = vfsp->vfc_vfsops;
876 	mp->mnt_vfc = vfsp;
877 	vfsp->vfc_refcount++;
878 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
879 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
880 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
881 	mp->mnt_vnodecovered = vp;
882 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
883 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
884 	mp->mnt_iosize_max = DFLTPHYS;
885 	VOP_UNLOCK(vp, 0, td);
886 update:
887 	/*
888 	 * Check if the fs implements the old VFS_MOUNT()
889 	 * function, since the old system call was used.
890 	 */
891 	if (mp->mnt_op->vfs_mount == NULL) {
892 		printf("%s doesn't support the old mount syscall\n",
893 		    mp->mnt_vfc->vfc_name);
894 		mtx_lock(&vp->v_interlock);
895 		vp->v_flag &= ~VMOUNT;
896 		mtx_unlock(&vp->v_interlock);
897 		if (mp->mnt_flag & MNT_UPDATE)
898 			vfs_unbusy(mp, td);
899 		else {
900 			mp->mnt_vfc->vfc_refcount--;
901 			vfs_unbusy(mp, td);
902 			free((caddr_t)mp, M_MOUNT);
903 		}
904 		vrele(vp);
905 		return (EOPNOTSUPP);
906 	}
907 
908 	/*
909 	 * Set the mount level flags.
910 	 */
911 	if (fsflags & MNT_RDONLY)
912 		mp->mnt_flag |= MNT_RDONLY;
913 	else if (mp->mnt_flag & MNT_RDONLY)
914 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
915 	mp->mnt_flag &=~ MNT_UPDATEMASK;
916 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
917 	/*
918 	 * Mount the filesystem.
919 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
920 	 * get.  No freeing of cn_pnbuf.
921 	 */
922 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
923 	if (mp->mnt_flag & MNT_UPDATE) {
924 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
925 			mp->mnt_flag &= ~MNT_RDONLY;
926 		mp->mnt_flag &=~
927 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
928 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
929 		if (error) {
930 			mp->mnt_flag = flag;
931 			mp->mnt_kern_flag = kern_flag;
932 		}
933 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
934 			if (mp->mnt_syncer == NULL)
935 				error = vfs_allocate_syncvnode(mp);
936 		} else {
937 			if (mp->mnt_syncer != NULL)
938 				vrele(mp->mnt_syncer);
939 			mp->mnt_syncer = NULL;
940 		}
941 		vfs_unbusy(mp, td);
942 		mtx_lock(&vp->v_interlock);
943 		vp->v_flag &= ~VMOUNT;
944 		mtx_unlock(&vp->v_interlock);
945 		vrele(vp);
946 		return (error);
947 	}
948 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
949 	/*
950 	 * Put the new filesystem on the mount list after root.
951 	 */
952 	cache_purge(vp);
953 	if (!error) {
954 		struct vnode *newdp;
955 
956 		mtx_lock(&vp->v_interlock);
957 		vp->v_flag &= ~VMOUNT;
958 		vp->v_mountedhere = mp;
959 		mtx_unlock(&vp->v_interlock);
960 		mtx_lock(&mountlist_mtx);
961 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
962 		mtx_unlock(&mountlist_mtx);
963 		if (VFS_ROOT(mp, &newdp))
964 			panic("mount: lost mount");
965 		checkdirs(vp, newdp);
966 		vput(newdp);
967 		VOP_UNLOCK(vp, 0, td);
968 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
969 			error = vfs_allocate_syncvnode(mp);
970 		vfs_unbusy(mp, td);
971 		if ((error = VFS_START(mp, 0, td)) != 0)
972 			vrele(vp);
973 	} else {
974 		mtx_lock(&vp->v_interlock);
975 		vp->v_flag &= ~VMOUNT;
976 		mtx_unlock(&vp->v_interlock);
977 		mp->mnt_vfc->vfc_refcount--;
978 		vfs_unbusy(mp, td);
979 		free((caddr_t)mp, M_MOUNT);
980 		vput(vp);
981 	}
982 	return (error);
983 }
984 
985 /*
986  * Scan all active processes to see if any of them have a current
987  * or root directory of `olddp'. If so, replace them with the new
988  * mount point.
989  */
990 static void
991 checkdirs(olddp, newdp)
992 	struct vnode *olddp, *newdp;
993 {
994 	struct filedesc *fdp;
995 	struct proc *p;
996 	int nrele;
997 
998 	if (olddp->v_usecount == 1)
999 		return;
1000 	sx_slock(&allproc_lock);
1001 	LIST_FOREACH(p, &allproc, p_list) {
1002 		PROC_LOCK(p);
1003 		fdp = p->p_fd;
1004 		if (fdp == NULL) {
1005 			PROC_UNLOCK(p);
1006 			continue;
1007 		}
1008 		nrele = 0;
1009 		FILEDESC_LOCK(fdp);
1010 		if (fdp->fd_cdir == olddp) {
1011 			VREF(newdp);
1012 			fdp->fd_cdir = newdp;
1013 			nrele++;
1014 		}
1015 		if (fdp->fd_rdir == olddp) {
1016 			VREF(newdp);
1017 			fdp->fd_rdir = newdp;
1018 			nrele++;
1019 		}
1020 		FILEDESC_UNLOCK(fdp);
1021 		PROC_UNLOCK(p);
1022 		while (nrele--)
1023 			vrele(olddp);
1024 	}
1025 	sx_sunlock(&allproc_lock);
1026 	if (rootvnode == olddp) {
1027 		vrele(rootvnode);
1028 		VREF(newdp);
1029 		rootvnode = newdp;
1030 	}
1031 }
1032 
1033 /*
1034  * Unmount a filesystem.
1035  *
1036  * Note: unmount takes a path to the vnode mounted on as argument,
1037  * not special file (as before).
1038  */
1039 #ifndef _SYS_SYSPROTO_H_
1040 struct unmount_args {
1041 	char	*path;
1042 	int	flags;
1043 };
1044 #endif
1045 /* ARGSUSED */
1046 int
1047 unmount(td, uap)
1048 	struct thread *td;
1049 	register struct unmount_args /* {
1050 		syscallarg(char *) path;
1051 		syscallarg(int) flags;
1052 	} */ *uap;
1053 {
1054 	register struct vnode *vp;
1055 	struct mount *mp;
1056 	int error;
1057 	struct nameidata nd;
1058 
1059 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1060 	    SCARG(uap, path), td);
1061 	if ((error = namei(&nd)) != 0)
1062 		return (error);
1063 	vp = nd.ni_vp;
1064 	NDFREE(&nd, NDF_ONLY_PNBUF);
1065 	mp = vp->v_mount;
1066 
1067 	/*
1068 	 * Only root, or the user that did the original mount is
1069 	 * permitted to unmount this filesystem.
1070 	 */
1071 	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1072 		error = suser(td);
1073 		if (error) {
1074 			vput(vp);
1075 			return (error);
1076 		}
1077 	}
1078 
1079 	/*
1080 	 * Don't allow unmounting the root filesystem.
1081 	 */
1082 	if (mp->mnt_flag & MNT_ROOTFS) {
1083 		vput(vp);
1084 		return (EINVAL);
1085 	}
1086 
1087 	/*
1088 	 * Must be the root of the filesystem
1089 	 */
1090 	if ((vp->v_flag & VROOT) == 0) {
1091 		vput(vp);
1092 		return (EINVAL);
1093 	}
1094 	vput(vp);
1095 	return (dounmount(mp, SCARG(uap, flags), td));
1096 }
1097 
1098 /*
1099  * Do the actual filesystem unmount.
1100  */
1101 int
1102 dounmount(mp, flags, td)
1103 	struct mount *mp;
1104 	int flags;
1105 	struct thread *td;
1106 {
1107 	struct vnode *coveredvp, *fsrootvp;
1108 	int error;
1109 	int async_flag;
1110 
1111 	mtx_lock(&mountlist_mtx);
1112 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1113 		mtx_unlock(&mountlist_mtx);
1114 		return (EBUSY);
1115 	}
1116 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1117 	/* Allow filesystems to detect that a forced unmount is in progress. */
1118 	if (flags & MNT_FORCE)
1119 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1120 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1121 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1122 	if (error) {
1123 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1124 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1125 			wakeup((caddr_t)mp);
1126 		return (error);
1127 	}
1128 	vn_start_write(NULL, &mp, V_WAIT);
1129 
1130 	if (mp->mnt_flag & MNT_EXPUBLIC)
1131 		vfs_setpublicfs(NULL, NULL, NULL);
1132 
1133 	vfs_msync(mp, MNT_WAIT);
1134 	async_flag = mp->mnt_flag & MNT_ASYNC;
1135 	mp->mnt_flag &=~ MNT_ASYNC;
1136 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1137 	if (mp->mnt_syncer != NULL)
1138 		vrele(mp->mnt_syncer);
1139 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1140 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1141 		if (mp->mnt_vnodecovered != NULL)
1142 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1143 		if (fsrootvp == rootvnode) {
1144 			vrele(rootvnode);
1145 			rootvnode = NULL;
1146 		}
1147 		vput(fsrootvp);
1148 	}
1149 	if (((mp->mnt_flag & MNT_RDONLY) ||
1150 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1151 	    (flags & MNT_FORCE)) {
1152 		error = VFS_UNMOUNT(mp, flags, td);
1153 	}
1154 	vn_finished_write(mp);
1155 	if (error) {
1156 		/* Undo cdir/rdir and rootvnode changes made above. */
1157 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1158 			if (mp->mnt_vnodecovered != NULL)
1159 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1160 			if (rootvnode == NULL) {
1161 				rootvnode = fsrootvp;
1162 				vref(rootvnode);
1163 			}
1164 			vput(fsrootvp);
1165 		}
1166 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1167 			(void) vfs_allocate_syncvnode(mp);
1168 		mtx_lock(&mountlist_mtx);
1169 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1170 		mp->mnt_flag |= async_flag;
1171 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1172 		    &mountlist_mtx, td);
1173 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1174 			wakeup((caddr_t)mp);
1175 		return (error);
1176 	}
1177 	mtx_lock(&mountlist_mtx);
1178 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1179 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1180 		coveredvp->v_mountedhere = NULL;
1181 	mp->mnt_vfc->vfc_refcount--;
1182 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1183 		panic("unmount: dangling vnode");
1184 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1185 	lockdestroy(&mp->mnt_lock);
1186 	if (coveredvp != NULL)
1187 		vrele(coveredvp);
1188 	if (mp->mnt_kern_flag & MNTK_MWAIT)
1189 		wakeup((caddr_t)mp);
1190 	if (mp->mnt_op->vfs_mount == NULL)
1191 		vfs_freeopts(mp->mnt_opt);
1192 	free((caddr_t)mp, M_MOUNT);
1193 	return (0);
1194 }
1195 
1196 /*
1197  * Sync each mounted filesystem.
1198  */
1199 #ifndef _SYS_SYSPROTO_H_
1200 struct sync_args {
1201         int     dummy;
1202 };
1203 #endif
1204 
1205 #ifdef DEBUG
1206 static int syncprt = 0;
1207 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1208 #endif
1209 
1210 /* ARGSUSED */
1211 int
1212 sync(td, uap)
1213 	struct thread *td;
1214 	struct sync_args *uap;
1215 {
1216 	struct mount *mp, *nmp;
1217 	int asyncflag;
1218 
1219 	mtx_lock(&mountlist_mtx);
1220 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1221 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1222 			nmp = TAILQ_NEXT(mp, mnt_list);
1223 			continue;
1224 		}
1225 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
1226 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
1227 			asyncflag = mp->mnt_flag & MNT_ASYNC;
1228 			mp->mnt_flag &= ~MNT_ASYNC;
1229 			vfs_msync(mp, MNT_NOWAIT);
1230 			VFS_SYNC(mp, MNT_NOWAIT,
1231 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
1232 			mp->mnt_flag |= asyncflag;
1233 			vn_finished_write(mp);
1234 		}
1235 		mtx_lock(&mountlist_mtx);
1236 		nmp = TAILQ_NEXT(mp, mnt_list);
1237 		vfs_unbusy(mp, td);
1238 	}
1239 	mtx_unlock(&mountlist_mtx);
1240 #if 0
1241 /*
1242  * XXX don't call vfs_bufstats() yet because that routine
1243  * was not imported in the Lite2 merge.
1244  */
1245 #ifdef DIAGNOSTIC
1246 	if (syncprt)
1247 		vfs_bufstats();
1248 #endif /* DIAGNOSTIC */
1249 #endif
1250 	return (0);
1251 }
1252 
1253 /* XXX PRISON: could be per prison flag */
1254 static int prison_quotas;
1255 #if 0
1256 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1257 #endif
1258 
1259 /*
1260  * Change filesystem quotas.
1261  */
1262 #ifndef _SYS_SYSPROTO_H_
1263 struct quotactl_args {
1264 	char *path;
1265 	int cmd;
1266 	int uid;
1267 	caddr_t arg;
1268 };
1269 #endif
1270 /* ARGSUSED */
1271 int
1272 quotactl(td, uap)
1273 	struct thread *td;
1274 	register struct quotactl_args /* {
1275 		syscallarg(char *) path;
1276 		syscallarg(int) cmd;
1277 		syscallarg(int) uid;
1278 		syscallarg(caddr_t) arg;
1279 	} */ *uap;
1280 {
1281 	struct mount *mp;
1282 	int error;
1283 	struct nameidata nd;
1284 
1285 	if (jailed(td->td_ucred) && !prison_quotas)
1286 		return (EPERM);
1287 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1288 	if ((error = namei(&nd)) != 0)
1289 		return (error);
1290 	NDFREE(&nd, NDF_ONLY_PNBUF);
1291 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
1292 	vrele(nd.ni_vp);
1293 	if (error)
1294 		return (error);
1295 	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
1296 	    SCARG(uap, arg), td);
1297 	vn_finished_write(mp);
1298 	return (error);
1299 }
1300 
1301 /*
1302  * Get filesystem statistics.
1303  */
1304 #ifndef _SYS_SYSPROTO_H_
1305 struct statfs_args {
1306 	char *path;
1307 	struct statfs *buf;
1308 };
1309 #endif
1310 /* ARGSUSED */
1311 int
1312 statfs(td, uap)
1313 	struct thread *td;
1314 	register struct statfs_args /* {
1315 		syscallarg(char *) path;
1316 		syscallarg(struct statfs *) buf;
1317 	} */ *uap;
1318 {
1319 	register struct mount *mp;
1320 	register struct statfs *sp;
1321 	int error;
1322 	struct nameidata nd;
1323 	struct statfs sb;
1324 
1325 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1326 	if ((error = namei(&nd)) != 0)
1327 		return (error);
1328 	mp = nd.ni_vp->v_mount;
1329 	sp = &mp->mnt_stat;
1330 	NDFREE(&nd, NDF_ONLY_PNBUF);
1331 	vrele(nd.ni_vp);
1332 	error = VFS_STATFS(mp, sp, td);
1333 	if (error)
1334 		return (error);
1335 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1336 	if (suser(td)) {
1337 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1338 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1339 		sp = &sb;
1340 	}
1341 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1342 }
1343 
1344 /*
1345  * Get filesystem statistics.
1346  */
1347 #ifndef _SYS_SYSPROTO_H_
1348 struct fstatfs_args {
1349 	int fd;
1350 	struct statfs *buf;
1351 };
1352 #endif
1353 /* ARGSUSED */
1354 int
1355 fstatfs(td, uap)
1356 	struct thread *td;
1357 	register struct fstatfs_args /* {
1358 		syscallarg(int) fd;
1359 		syscallarg(struct statfs *) buf;
1360 	} */ *uap;
1361 {
1362 	struct file *fp;
1363 	struct mount *mp;
1364 	register struct statfs *sp;
1365 	int error;
1366 	struct statfs sb;
1367 
1368 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1369 		return (error);
1370 	mp = ((struct vnode *)fp->f_data)->v_mount;
1371 	fdrop(fp, td);
1372 	if (mp == NULL)
1373 		return (EBADF);
1374 	sp = &mp->mnt_stat;
1375 	error = VFS_STATFS(mp, sp, td);
1376 	if (error)
1377 		return (error);
1378 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1379 	if (suser(td)) {
1380 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1381 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1382 		sp = &sb;
1383 	}
1384 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1385 }
1386 
1387 /*
1388  * Get statistics on all filesystems.
1389  */
1390 #ifndef _SYS_SYSPROTO_H_
1391 struct getfsstat_args {
1392 	struct statfs *buf;
1393 	long bufsize;
1394 	int flags;
1395 };
1396 #endif
1397 int
1398 getfsstat(td, uap)
1399 	struct thread *td;
1400 	register struct getfsstat_args /* {
1401 		syscallarg(struct statfs *) buf;
1402 		syscallarg(long) bufsize;
1403 		syscallarg(int) flags;
1404 	} */ *uap;
1405 {
1406 	register struct mount *mp, *nmp;
1407 	register struct statfs *sp;
1408 	caddr_t sfsp;
1409 	long count, maxcount, error;
1410 
1411 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
1412 	sfsp = (caddr_t)SCARG(uap, buf);
1413 	count = 0;
1414 	mtx_lock(&mountlist_mtx);
1415 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1416 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1417 			nmp = TAILQ_NEXT(mp, mnt_list);
1418 			continue;
1419 		}
1420 		if (sfsp && count < maxcount) {
1421 			sp = &mp->mnt_stat;
1422 			/*
1423 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1424 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1425 			 * overrides MNT_WAIT.
1426 			 */
1427 			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1428 			    (SCARG(uap, flags) & MNT_WAIT)) &&
1429 			    (error = VFS_STATFS(mp, sp, td))) {
1430 				mtx_lock(&mountlist_mtx);
1431 				nmp = TAILQ_NEXT(mp, mnt_list);
1432 				vfs_unbusy(mp, td);
1433 				continue;
1434 			}
1435 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1436 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
1437 			if (error) {
1438 				vfs_unbusy(mp, td);
1439 				return (error);
1440 			}
1441 			sfsp += sizeof(*sp);
1442 		}
1443 		count++;
1444 		mtx_lock(&mountlist_mtx);
1445 		nmp = TAILQ_NEXT(mp, mnt_list);
1446 		vfs_unbusy(mp, td);
1447 	}
1448 	mtx_unlock(&mountlist_mtx);
1449 	if (sfsp && count > maxcount)
1450 		td->td_retval[0] = maxcount;
1451 	else
1452 		td->td_retval[0] = count;
1453 	return (0);
1454 }
1455 
1456 /*
1457  * Change current working directory to a given file descriptor.
1458  */
1459 #ifndef _SYS_SYSPROTO_H_
1460 struct fchdir_args {
1461 	int	fd;
1462 };
1463 #endif
1464 /* ARGSUSED */
1465 int
1466 fchdir(td, uap)
1467 	struct thread *td;
1468 	struct fchdir_args /* {
1469 		syscallarg(int) fd;
1470 	} */ *uap;
1471 {
1472 	register struct filedesc *fdp = td->td_proc->p_fd;
1473 	struct vnode *vp, *tdp, *vpold;
1474 	struct mount *mp;
1475 	struct file *fp;
1476 	int error;
1477 
1478 	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1479 		return (error);
1480 	vp = (struct vnode *)fp->f_data;
1481 	VREF(vp);
1482 	fdrop(fp, td);
1483 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1484 	if (vp->v_type != VDIR)
1485 		error = ENOTDIR;
1486 	else
1487 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1488 	while (!error && (mp = vp->v_mountedhere) != NULL) {
1489 		if (vfs_busy(mp, 0, 0, td))
1490 			continue;
1491 		error = VFS_ROOT(mp, &tdp);
1492 		vfs_unbusy(mp, td);
1493 		if (error)
1494 			break;
1495 		vput(vp);
1496 		vp = tdp;
1497 	}
1498 	if (error) {
1499 		vput(vp);
1500 		return (error);
1501 	}
1502 	VOP_UNLOCK(vp, 0, td);
1503 	FILEDESC_LOCK(fdp);
1504 	vpold = fdp->fd_cdir;
1505 	fdp->fd_cdir = vp;
1506 	FILEDESC_UNLOCK(fdp);
1507 	vrele(vpold);
1508 	return (0);
1509 }
1510 
1511 /*
1512  * Change current working directory (``.'').
1513  */
1514 #ifndef _SYS_SYSPROTO_H_
1515 struct chdir_args {
1516 	char	*path;
1517 };
1518 #endif
1519 /* ARGSUSED */
1520 int
1521 chdir(td, uap)
1522 	struct thread *td;
1523 	struct chdir_args /* {
1524 		syscallarg(char *) path;
1525 	} */ *uap;
1526 {
1527 	register struct filedesc *fdp = td->td_proc->p_fd;
1528 	int error;
1529 	struct nameidata nd;
1530 	struct vnode *vp;
1531 
1532 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1533 	    SCARG(uap, path), td);
1534 	if ((error = change_dir(&nd, td)) != 0)
1535 		return (error);
1536 	NDFREE(&nd, NDF_ONLY_PNBUF);
1537 	FILEDESC_LOCK(fdp);
1538 	vp = fdp->fd_cdir;
1539 	fdp->fd_cdir = nd.ni_vp;
1540 	FILEDESC_UNLOCK(fdp);
1541 	vrele(vp);
1542 	return (0);
1543 }
1544 
1545 /*
1546  * Helper function for raised chroot(2) security function:  Refuse if
1547  * any filedescriptors are open directories.
1548  */
1549 static int
1550 chroot_refuse_vdir_fds(fdp)
1551 	struct filedesc *fdp;
1552 {
1553 	struct vnode *vp;
1554 	struct file *fp;
1555 	int fd;
1556 
1557 	FILEDESC_LOCK(fdp);
1558 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1559 		fp = fget_locked(fdp, fd);
1560 		if (fp == NULL)
1561 			continue;
1562 		if (fp->f_type == DTYPE_VNODE) {
1563 			vp = (struct vnode *)fp->f_data;
1564 			if (vp->v_type == VDIR) {
1565 				FILEDESC_UNLOCK(fdp);
1566 				return (EPERM);
1567 			}
1568 		}
1569 	}
1570 	FILEDESC_UNLOCK(fdp);
1571 	return (0);
1572 }
1573 
1574 /*
1575  * This sysctl determines if we will allow a process to chroot(2) if it
1576  * has a directory open:
1577  *	0: disallowed for all processes.
1578  *	1: allowed for processes that were not already chroot(2)'ed.
1579  *	2: allowed for all processes.
1580  */
1581 
1582 static int chroot_allow_open_directories = 1;
1583 
1584 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1585      &chroot_allow_open_directories, 0, "");
1586 
1587 /*
1588  * Change notion of root (``/'') directory.
1589  */
1590 #ifndef _SYS_SYSPROTO_H_
1591 struct chroot_args {
1592 	char	*path;
1593 };
1594 #endif
1595 /* ARGSUSED */
1596 int
1597 chroot(td, uap)
1598 	struct thread *td;
1599 	struct chroot_args /* {
1600 		syscallarg(char *) path;
1601 	} */ *uap;
1602 {
1603 	register struct filedesc *fdp = td->td_proc->p_fd;
1604 	int error;
1605 	struct nameidata nd;
1606 	struct vnode *vp;
1607 
1608 	error = suser_cred(td->td_ucred, PRISON_ROOT);
1609 	if (error)
1610 		return (error);
1611 	FILEDESC_LOCK(fdp);
1612 	if (chroot_allow_open_directories == 0 ||
1613 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1614 		FILEDESC_UNLOCK(fdp);
1615 		error = chroot_refuse_vdir_fds(fdp);
1616 	} else
1617 		FILEDESC_UNLOCK(fdp);
1618 	if (error)
1619 		return (error);
1620 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1621 	    SCARG(uap, path), td);
1622 	if ((error = change_dir(&nd, td)) != 0)
1623 		return (error);
1624 	NDFREE(&nd, NDF_ONLY_PNBUF);
1625 	FILEDESC_LOCK(fdp);
1626 	vp = fdp->fd_rdir;
1627 	fdp->fd_rdir = nd.ni_vp;
1628 	if (!fdp->fd_jdir) {
1629 		fdp->fd_jdir = nd.ni_vp;
1630                 VREF(fdp->fd_jdir);
1631 	}
1632 	FILEDESC_UNLOCK(fdp);
1633 	vrele(vp);
1634 	return (0);
1635 }
1636 
1637 /*
1638  * Common routine for chroot and chdir.
1639  */
1640 static int
1641 change_dir(ndp, td)
1642 	register struct nameidata *ndp;
1643 	struct thread *td;
1644 {
1645 	struct vnode *vp;
1646 	int error;
1647 
1648 	error = namei(ndp);
1649 	if (error)
1650 		return (error);
1651 	vp = ndp->ni_vp;
1652 	if (vp->v_type != VDIR)
1653 		error = ENOTDIR;
1654 	else
1655 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1656 	if (error)
1657 		vput(vp);
1658 	else
1659 		VOP_UNLOCK(vp, 0, td);
1660 	return (error);
1661 }
1662 
1663 /*
1664  * Check permissions, allocate an open file structure,
1665  * and call the device open routine if any.
1666  */
1667 #ifndef _SYS_SYSPROTO_H_
1668 struct open_args {
1669 	char	*path;
1670 	int	flags;
1671 	int	mode;
1672 };
1673 #endif
1674 int
1675 open(td, uap)
1676 	struct thread *td;
1677 	register struct open_args /* {
1678 		syscallarg(char *) path;
1679 		syscallarg(int) flags;
1680 		syscallarg(int) mode;
1681 	} */ *uap;
1682 {
1683 	struct proc *p = td->td_proc;
1684 	struct filedesc *fdp = p->p_fd;
1685 	struct file *fp;
1686 	struct vnode *vp;
1687 	struct vattr vat;
1688 	struct mount *mp;
1689 	int cmode, flags, oflags;
1690 	struct file *nfp;
1691 	int type, indx, error;
1692 	struct flock lf;
1693 	struct nameidata nd;
1694 
1695 	oflags = SCARG(uap, flags);
1696 	if ((oflags & O_ACCMODE) == O_ACCMODE)
1697 		return (EINVAL);
1698 	flags = FFLAGS(oflags);
1699 	error = falloc(td, &nfp, &indx);
1700 	if (error)
1701 		return (error);
1702 	fp = nfp;
1703 	FILEDESC_LOCK(fdp);
1704 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1705 	FILEDESC_UNLOCK(fdp);
1706 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1707 	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1708 	/*
1709 	 * Bump the ref count to prevent another process from closing
1710 	 * the descriptor while we are blocked in vn_open()
1711 	 */
1712 	fhold(fp);
1713 	error = vn_open(&nd, &flags, cmode);
1714 	if (error) {
1715 		/*
1716 		 * release our own reference
1717 		 */
1718 		fdrop(fp, td);
1719 
1720 		/*
1721 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1722 		 * responsible for dropping the old contents of ofiles[indx]
1723 		 * if it succeeds.
1724 		 */
1725 		if ((error == ENODEV || error == ENXIO) &&
1726 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1727 		    (error =
1728 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1729 			td->td_retval[0] = indx;
1730 			return (0);
1731 		}
1732 		/*
1733 		 * Clean up the descriptor, but only if another thread hadn't
1734 		 * replaced or closed it.
1735 		 */
1736 		FILEDESC_LOCK(fdp);
1737 		if (fdp->fd_ofiles[indx] == fp) {
1738 			fdp->fd_ofiles[indx] = NULL;
1739 			FILEDESC_UNLOCK(fdp);
1740 			fdrop(fp, td);
1741 		} else
1742 			FILEDESC_UNLOCK(fdp);
1743 
1744 		if (error == ERESTART)
1745 			error = EINTR;
1746 		return (error);
1747 	}
1748 	td->td_dupfd = 0;
1749 	NDFREE(&nd, NDF_ONLY_PNBUF);
1750 	vp = nd.ni_vp;
1751 
1752 	/*
1753 	 * There should be 2 references on the file, one from the descriptor
1754 	 * table, and one for us.
1755 	 *
1756 	 * Handle the case where someone closed the file (via its file
1757 	 * descriptor) while we were blocked.  The end result should look
1758 	 * like opening the file succeeded but it was immediately closed.
1759 	 */
1760 	FILEDESC_LOCK(fdp);
1761 	FILE_LOCK(fp);
1762 	if (fp->f_count == 1) {
1763 		KASSERT(fdp->fd_ofiles[indx] != fp,
1764 		    ("Open file descriptor lost all refs"));
1765 		FILEDESC_UNLOCK(fdp);
1766 		FILE_UNLOCK(fp);
1767 		VOP_UNLOCK(vp, 0, td);
1768 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1769 		fdrop(fp, td);
1770 		td->td_retval[0] = indx;
1771 		return 0;
1772 	}
1773 
1774 	/* assert that vn_open created a backing object if one is needed */
1775 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1776 		("open: vmio vnode has no backing object after vn_open"));
1777 
1778 	fp->f_data = (caddr_t)vp;
1779 	fp->f_flag = flags & FMASK;
1780 	fp->f_ops = &vnops;
1781 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1782 	FILEDESC_UNLOCK(fdp);
1783 	FILE_UNLOCK(fp);
1784 	VOP_UNLOCK(vp, 0, td);
1785 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1786 		lf.l_whence = SEEK_SET;
1787 		lf.l_start = 0;
1788 		lf.l_len = 0;
1789 		if (flags & O_EXLOCK)
1790 			lf.l_type = F_WRLCK;
1791 		else
1792 			lf.l_type = F_RDLCK;
1793 		type = F_FLOCK;
1794 		if ((flags & FNONBLOCK) == 0)
1795 			type |= F_WAIT;
1796 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1797 			goto bad;
1798 		fp->f_flag |= FHASLOCK;
1799 	}
1800 	if (flags & O_TRUNC) {
1801 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1802 			goto bad;
1803 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1804 		VATTR_NULL(&vat);
1805 		vat.va_size = 0;
1806 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1807 		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1808 		VOP_UNLOCK(vp, 0, td);
1809 		vn_finished_write(mp);
1810 		if (error)
1811 			goto bad;
1812 	}
1813 	/*
1814 	 * Release our private reference, leaving the one associated with
1815 	 * the descriptor table intact.
1816 	 */
1817 	fdrop(fp, td);
1818 	td->td_retval[0] = indx;
1819 	return (0);
1820 bad:
1821 	FILEDESC_LOCK(fdp);
1822 	if (fdp->fd_ofiles[indx] == fp) {
1823 		fdp->fd_ofiles[indx] = NULL;
1824 		FILEDESC_UNLOCK(fdp);
1825 		fdrop(fp, td);
1826 	} else
1827 		FILEDESC_UNLOCK(fdp);
1828 	return (error);
1829 }
1830 
1831 #ifdef COMPAT_43
1832 /*
1833  * Create a file.
1834  */
1835 #ifndef _SYS_SYSPROTO_H_
1836 struct ocreat_args {
1837 	char	*path;
1838 	int	mode;
1839 };
1840 #endif
1841 int
1842 ocreat(td, uap)
1843 	struct thread *td;
1844 	register struct ocreat_args /* {
1845 		syscallarg(char *) path;
1846 		syscallarg(int) mode;
1847 	} */ *uap;
1848 {
1849 	struct open_args /* {
1850 		syscallarg(char *) path;
1851 		syscallarg(int) flags;
1852 		syscallarg(int) mode;
1853 	} */ nuap;
1854 
1855 	SCARG(&nuap, path) = SCARG(uap, path);
1856 	SCARG(&nuap, mode) = SCARG(uap, mode);
1857 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1858 	return (open(td, &nuap));
1859 }
1860 #endif /* COMPAT_43 */
1861 
1862 /*
1863  * Create a special file.
1864  */
1865 #ifndef _SYS_SYSPROTO_H_
1866 struct mknod_args {
1867 	char	*path;
1868 	int	mode;
1869 	int	dev;
1870 };
1871 #endif
1872 /* ARGSUSED */
1873 int
1874 mknod(td, uap)
1875 	struct thread *td;
1876 	register struct mknod_args /* {
1877 		syscallarg(char *) path;
1878 		syscallarg(int) mode;
1879 		syscallarg(int) dev;
1880 	} */ *uap;
1881 {
1882 	struct vnode *vp;
1883 	struct mount *mp;
1884 	struct vattr vattr;
1885 	int error;
1886 	int whiteout = 0;
1887 	struct nameidata nd;
1888 
1889 	switch (SCARG(uap, mode) & S_IFMT) {
1890 	case S_IFCHR:
1891 	case S_IFBLK:
1892 		error = suser(td);
1893 		break;
1894 	default:
1895 		error = suser_cred(td->td_ucred, PRISON_ROOT);
1896 		break;
1897 	}
1898 	if (error)
1899 		return (error);
1900 restart:
1901 	bwillwrite();
1902 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1903 	if ((error = namei(&nd)) != 0)
1904 		return (error);
1905 	vp = nd.ni_vp;
1906 	if (vp != NULL) {
1907 		vrele(vp);
1908 		error = EEXIST;
1909 	} else {
1910 		VATTR_NULL(&vattr);
1911 		FILEDESC_LOCK(td->td_proc->p_fd);
1912 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1913 		FILEDESC_UNLOCK(td->td_proc->p_fd);
1914 		vattr.va_rdev = SCARG(uap, dev);
1915 		whiteout = 0;
1916 
1917 		switch (SCARG(uap, mode) & S_IFMT) {
1918 		case S_IFMT:	/* used by badsect to flag bad sectors */
1919 			vattr.va_type = VBAD;
1920 			break;
1921 		case S_IFCHR:
1922 			vattr.va_type = VCHR;
1923 			break;
1924 		case S_IFBLK:
1925 			vattr.va_type = VBLK;
1926 			break;
1927 		case S_IFWHT:
1928 			whiteout = 1;
1929 			break;
1930 		default:
1931 			error = EINVAL;
1932 			break;
1933 		}
1934 	}
1935 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1936 		NDFREE(&nd, NDF_ONLY_PNBUF);
1937 		vput(nd.ni_dvp);
1938 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1939 			return (error);
1940 		goto restart;
1941 	}
1942 	if (!error) {
1943 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1944 		if (whiteout)
1945 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1946 		else {
1947 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1948 						&nd.ni_cnd, &vattr);
1949 			if (error == 0)
1950 				vput(nd.ni_vp);
1951 		}
1952 	}
1953 	NDFREE(&nd, NDF_ONLY_PNBUF);
1954 	vput(nd.ni_dvp);
1955 	vn_finished_write(mp);
1956 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1957 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1958 	return (error);
1959 }
1960 
1961 /*
1962  * Create a named pipe.
1963  */
1964 #ifndef _SYS_SYSPROTO_H_
1965 struct mkfifo_args {
1966 	char	*path;
1967 	int	mode;
1968 };
1969 #endif
1970 /* ARGSUSED */
1971 int
1972 mkfifo(td, uap)
1973 	struct thread *td;
1974 	register struct mkfifo_args /* {
1975 		syscallarg(char *) path;
1976 		syscallarg(int) mode;
1977 	} */ *uap;
1978 {
1979 	struct mount *mp;
1980 	struct vattr vattr;
1981 	int error;
1982 	struct nameidata nd;
1983 
1984 restart:
1985 	bwillwrite();
1986 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1987 	if ((error = namei(&nd)) != 0)
1988 		return (error);
1989 	if (nd.ni_vp != NULL) {
1990 		NDFREE(&nd, NDF_ONLY_PNBUF);
1991 		vrele(nd.ni_vp);
1992 		vput(nd.ni_dvp);
1993 		return (EEXIST);
1994 	}
1995 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1996 		NDFREE(&nd, NDF_ONLY_PNBUF);
1997 		vput(nd.ni_dvp);
1998 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1999 			return (error);
2000 		goto restart;
2001 	}
2002 	VATTR_NULL(&vattr);
2003 	vattr.va_type = VFIFO;
2004 	FILEDESC_LOCK(td->td_proc->p_fd);
2005 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
2006 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2007 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2008 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2009 	if (error == 0)
2010 		vput(nd.ni_vp);
2011 	NDFREE(&nd, NDF_ONLY_PNBUF);
2012 	vput(nd.ni_dvp);
2013 	vn_finished_write(mp);
2014 	return (error);
2015 }
2016 
2017 /*
2018  * Make a hard file link.
2019  */
2020 #ifndef _SYS_SYSPROTO_H_
2021 struct link_args {
2022 	char	*path;
2023 	char	*link;
2024 };
2025 #endif
2026 /* ARGSUSED */
2027 int
2028 link(td, uap)
2029 	struct thread *td;
2030 	register struct link_args /* {
2031 		syscallarg(char *) path;
2032 		syscallarg(char *) link;
2033 	} */ *uap;
2034 {
2035 	struct vnode *vp;
2036 	struct mount *mp;
2037 	struct nameidata nd;
2038 	int error;
2039 
2040 	bwillwrite();
2041 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
2042 	if ((error = namei(&nd)) != 0)
2043 		return (error);
2044 	NDFREE(&nd, NDF_ONLY_PNBUF);
2045 	vp = nd.ni_vp;
2046 	if (vp->v_type == VDIR) {
2047 		vrele(vp);
2048 		return (EPERM);		/* POSIX */
2049 	}
2050 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2051 		vrele(vp);
2052 		return (error);
2053 	}
2054 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2055 	if ((error = namei(&nd)) == 0) {
2056 		if (nd.ni_vp != NULL) {
2057 			vrele(nd.ni_vp);
2058 			error = EEXIST;
2059 		} else {
2060 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2061 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2062 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2063 		}
2064 		NDFREE(&nd, NDF_ONLY_PNBUF);
2065 		vput(nd.ni_dvp);
2066 	}
2067 	vrele(vp);
2068 	vn_finished_write(mp);
2069 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
2070 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
2071 	return (error);
2072 }
2073 
2074 /*
2075  * Make a symbolic link.
2076  */
2077 #ifndef _SYS_SYSPROTO_H_
2078 struct symlink_args {
2079 	char	*path;
2080 	char	*link;
2081 };
2082 #endif
2083 /* ARGSUSED */
2084 int
2085 symlink(td, uap)
2086 	struct thread *td;
2087 	register struct symlink_args /* {
2088 		syscallarg(char *) path;
2089 		syscallarg(char *) link;
2090 	} */ *uap;
2091 {
2092 	struct mount *mp;
2093 	struct vattr vattr;
2094 	char *path;
2095 	int error;
2096 	struct nameidata nd;
2097 
2098 	path = uma_zalloc(namei_zone, M_WAITOK);
2099 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
2100 		goto out;
2101 restart:
2102 	bwillwrite();
2103 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2104 	if ((error = namei(&nd)) != 0)
2105 		goto out;
2106 	if (nd.ni_vp) {
2107 		NDFREE(&nd, NDF_ONLY_PNBUF);
2108 		vrele(nd.ni_vp);
2109 		vput(nd.ni_dvp);
2110 		error = EEXIST;
2111 		goto out;
2112 	}
2113 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2114 		NDFREE(&nd, NDF_ONLY_PNBUF);
2115 		vput(nd.ni_dvp);
2116 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2117 			return (error);
2118 		goto restart;
2119 	}
2120 	VATTR_NULL(&vattr);
2121 	FILEDESC_LOCK(td->td_proc->p_fd);
2122 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
2123 	FILEDESC_UNLOCK(td->td_proc->p_fd);
2124 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2125 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2126 	NDFREE(&nd, NDF_ONLY_PNBUF);
2127 	if (error == 0)
2128 		vput(nd.ni_vp);
2129 	vput(nd.ni_dvp);
2130 	vn_finished_write(mp);
2131 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
2132 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
2133 out:
2134 	uma_zfree(namei_zone, path);
2135 	return (error);
2136 }
2137 
2138 /*
2139  * Delete a whiteout from the filesystem.
2140  */
2141 /* ARGSUSED */
2142 int
2143 undelete(td, uap)
2144 	struct thread *td;
2145 	register struct undelete_args /* {
2146 		syscallarg(char *) path;
2147 	} */ *uap;
2148 {
2149 	int error;
2150 	struct mount *mp;
2151 	struct nameidata nd;
2152 
2153 restart:
2154 	bwillwrite();
2155 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2156 	    SCARG(uap, path), td);
2157 	error = namei(&nd);
2158 	if (error)
2159 		return (error);
2160 
2161 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2162 		NDFREE(&nd, NDF_ONLY_PNBUF);
2163 		if (nd.ni_vp)
2164 			vrele(nd.ni_vp);
2165 		vput(nd.ni_dvp);
2166 		return (EEXIST);
2167 	}
2168 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2169 		NDFREE(&nd, NDF_ONLY_PNBUF);
2170 		vput(nd.ni_dvp);
2171 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2172 			return (error);
2173 		goto restart;
2174 	}
2175 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2176 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
2177 	NDFREE(&nd, NDF_ONLY_PNBUF);
2178 	vput(nd.ni_dvp);
2179 	vn_finished_write(mp);
2180 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
2181 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
2182 	return (error);
2183 }
2184 
2185 /*
2186  * Delete a name from the filesystem.
2187  */
2188 #ifndef _SYS_SYSPROTO_H_
2189 struct unlink_args {
2190 	char	*path;
2191 };
2192 #endif
2193 /* ARGSUSED */
2194 int
2195 unlink(td, uap)
2196 	struct thread *td;
2197 	struct unlink_args /* {
2198 		syscallarg(char *) path;
2199 	} */ *uap;
2200 {
2201 	struct mount *mp;
2202 	struct vnode *vp;
2203 	int error;
2204 	struct nameidata nd;
2205 
2206 restart:
2207 	bwillwrite();
2208 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
2209 	if ((error = namei(&nd)) != 0)
2210 		return (error);
2211 	vp = nd.ni_vp;
2212 	if (vp->v_type == VDIR)
2213 		error = EPERM;		/* POSIX */
2214 	else {
2215 		/*
2216 		 * The root of a mounted filesystem cannot be deleted.
2217 		 *
2218 		 * XXX: can this only be a VDIR case?
2219 		 */
2220 		if (vp->v_flag & VROOT)
2221 			error = EBUSY;
2222 	}
2223 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2224 		NDFREE(&nd, NDF_ONLY_PNBUF);
2225 		vrele(vp);
2226 		vput(nd.ni_dvp);
2227 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2228 			return (error);
2229 		goto restart;
2230 	}
2231 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2232 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2233 	if (!error) {
2234 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2235 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
2236 	}
2237 	NDFREE(&nd, NDF_ONLY_PNBUF);
2238 	vput(nd.ni_dvp);
2239 	vput(vp);
2240 	vn_finished_write(mp);
2241 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
2242 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
2243 	return (error);
2244 }
2245 
2246 /*
2247  * Reposition read/write file offset.
2248  */
2249 #ifndef _SYS_SYSPROTO_H_
2250 struct lseek_args {
2251 	int	fd;
2252 	int	pad;
2253 	off_t	offset;
2254 	int	whence;
2255 };
2256 #endif
2257 int
2258 lseek(td, uap)
2259 	struct thread *td;
2260 	register struct lseek_args /* {
2261 		syscallarg(int) fd;
2262 		syscallarg(int) pad;
2263 		syscallarg(off_t) offset;
2264 		syscallarg(int) whence;
2265 	} */ *uap;
2266 {
2267 	struct ucred *cred = td->td_ucred;
2268 	struct file *fp;
2269 	struct vnode *vp;
2270 	struct vattr vattr;
2271 	off_t offset;
2272 	int error, noneg;
2273 
2274 	if ((error = fget(td, uap->fd, &fp)) != 0)
2275 		return (error);
2276 	if (fp->f_type != DTYPE_VNODE) {
2277 		fdrop(fp, td);
2278 		return (ESPIPE);
2279 	}
2280 	vp = (struct vnode *)fp->f_data;
2281 	noneg = (vp->v_type != VCHR);
2282 	offset = SCARG(uap, offset);
2283 	switch (SCARG(uap, whence)) {
2284 	case L_INCR:
2285 		if (noneg &&
2286 		    (fp->f_offset < 0 ||
2287 		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
2288 			return (EOVERFLOW);
2289 		offset += fp->f_offset;
2290 		break;
2291 	case L_XTND:
2292 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2293 		error = VOP_GETATTR(vp, &vattr, cred, td);
2294 		VOP_UNLOCK(vp, 0, td);
2295 		if (error)
2296 			return (error);
2297 		if (noneg &&
2298 		    (vattr.va_size > OFF_MAX ||
2299 		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
2300 			return (EOVERFLOW);
2301 		offset += vattr.va_size;
2302 		break;
2303 	case L_SET:
2304 		break;
2305 	default:
2306 		fdrop(fp, td);
2307 		return (EINVAL);
2308 	}
2309 	if (noneg && offset < 0)
2310 		return (EINVAL);
2311 	fp->f_offset = offset;
2312 	*(off_t *)(td->td_retval) = fp->f_offset;
2313 	fdrop(fp, td);
2314 	return (0);
2315 }
2316 
2317 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2318 /*
2319  * Reposition read/write file offset.
2320  */
2321 #ifndef _SYS_SYSPROTO_H_
2322 struct olseek_args {
2323 	int	fd;
2324 	long	offset;
2325 	int	whence;
2326 };
2327 #endif
2328 int
2329 olseek(td, uap)
2330 	struct thread *td;
2331 	register struct olseek_args /* {
2332 		syscallarg(int) fd;
2333 		syscallarg(long) offset;
2334 		syscallarg(int) whence;
2335 	} */ *uap;
2336 {
2337 	struct lseek_args /* {
2338 		syscallarg(int) fd;
2339 		syscallarg(int) pad;
2340 		syscallarg(off_t) offset;
2341 		syscallarg(int) whence;
2342 	} */ nuap;
2343 	int error;
2344 
2345 	SCARG(&nuap, fd) = SCARG(uap, fd);
2346 	SCARG(&nuap, offset) = SCARG(uap, offset);
2347 	SCARG(&nuap, whence) = SCARG(uap, whence);
2348 	error = lseek(td, &nuap);
2349 	return (error);
2350 }
2351 #endif /* COMPAT_43 */
2352 
2353 /*
2354  * Check access permissions using passed credentials.
2355  */
2356 static int
2357 vn_access(vp, user_flags, cred, td)
2358 	struct vnode	*vp;
2359 	int		user_flags;
2360 	struct ucred	*cred;
2361 	struct thread	*td;
2362 {
2363 	int error, flags;
2364 
2365 	/* Flags == 0 means only check for existence. */
2366 	error = 0;
2367 	if (user_flags) {
2368 		flags = 0;
2369 		if (user_flags & R_OK)
2370 			flags |= VREAD;
2371 		if (user_flags & W_OK)
2372 			flags |= VWRITE;
2373 		if (user_flags & X_OK)
2374 			flags |= VEXEC;
2375 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2376 			error = VOP_ACCESS(vp, flags, cred, td);
2377 	}
2378 	return (error);
2379 }
2380 
2381 /*
2382  * Check access permissions using "real" credentials.
2383  */
2384 #ifndef _SYS_SYSPROTO_H_
2385 struct access_args {
2386 	char	*path;
2387 	int	flags;
2388 };
2389 #endif
2390 int
2391 access(td, uap)
2392 	struct thread *td;
2393 	register struct access_args /* {
2394 		syscallarg(char *) path;
2395 		syscallarg(int) flags;
2396 	} */ *uap;
2397 {
2398 	struct ucred *cred, *tmpcred;
2399 	register struct vnode *vp;
2400 	int error;
2401 	struct nameidata nd;
2402 
2403 	/*
2404 	 * Create and modify a temporary credential instead of one that
2405 	 * is potentially shared.  This could also mess up socket
2406 	 * buffer accounting which can run in an interrupt context.
2407 	 *
2408 	 * XXX - Depending on how "threads" are finally implemented, it
2409 	 * may be better to explicitly pass the credential to namei()
2410 	 * rather than to modify the potentially shared process structure.
2411 	 */
2412 	cred = td->td_ucred;
2413 	tmpcred = crdup(cred);
2414 	tmpcred->cr_uid = cred->cr_ruid;
2415 	tmpcred->cr_groups[0] = cred->cr_rgid;
2416 	td->td_ucred = tmpcred;
2417 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2418 	    SCARG(uap, path), td);
2419 	if ((error = namei(&nd)) != 0)
2420 		goto out1;
2421 	vp = nd.ni_vp;
2422 
2423 	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
2424 	NDFREE(&nd, NDF_ONLY_PNBUF);
2425 	vput(vp);
2426 out1:
2427 	td->td_ucred = cred;
2428 	crfree(tmpcred);
2429 	return (error);
2430 }
2431 
2432 /*
2433  * Check access permissions using "effective" credentials.
2434  */
2435 #ifndef _SYS_SYSPROTO_H_
2436 struct eaccess_args {
2437 	char	*path;
2438 	int	flags;
2439 };
2440 #endif
2441 int
2442 eaccess(td, uap)
2443 	struct thread *td;
2444 	register struct eaccess_args /* {
2445 		syscallarg(char *) path;
2446 		syscallarg(int) flags;
2447 	} */ *uap;
2448 {
2449 	struct nameidata nd;
2450 	struct vnode *vp;
2451 	int error;
2452 
2453 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2454 	    SCARG(uap, path), td);
2455 	if ((error = namei(&nd)) != 0)
2456 		return (error);
2457 	vp = nd.ni_vp;
2458 
2459 	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
2460 	NDFREE(&nd, NDF_ONLY_PNBUF);
2461 	vput(vp);
2462 	return (error);
2463 }
2464 
2465 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2466 /*
2467  * Get file status; this version follows links.
2468  */
2469 #ifndef _SYS_SYSPROTO_H_
2470 struct ostat_args {
2471 	char	*path;
2472 	struct ostat *ub;
2473 };
2474 #endif
2475 /* ARGSUSED */
2476 int
2477 ostat(td, uap)
2478 	struct thread *td;
2479 	register struct ostat_args /* {
2480 		syscallarg(char *) path;
2481 		syscallarg(struct ostat *) ub;
2482 	} */ *uap;
2483 {
2484 	struct stat sb;
2485 	struct ostat osb;
2486 	int error;
2487 	struct nameidata nd;
2488 
2489 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2490 	    SCARG(uap, path), td);
2491 	if ((error = namei(&nd)) != 0)
2492 		return (error);
2493 	NDFREE(&nd, NDF_ONLY_PNBUF);
2494 	error = vn_stat(nd.ni_vp, &sb, td);
2495 	vput(nd.ni_vp);
2496 	if (error)
2497 		return (error);
2498 	cvtstat(&sb, &osb);
2499 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2500 	return (error);
2501 }
2502 
2503 /*
2504  * Get file status; this version does not follow links.
2505  */
2506 #ifndef _SYS_SYSPROTO_H_
2507 struct olstat_args {
2508 	char	*path;
2509 	struct ostat *ub;
2510 };
2511 #endif
2512 /* ARGSUSED */
2513 int
2514 olstat(td, uap)
2515 	struct thread *td;
2516 	register struct olstat_args /* {
2517 		syscallarg(char *) path;
2518 		syscallarg(struct ostat *) ub;
2519 	} */ *uap;
2520 {
2521 	struct vnode *vp;
2522 	struct stat sb;
2523 	struct ostat osb;
2524 	int error;
2525 	struct nameidata nd;
2526 
2527 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2528 	    SCARG(uap, path), td);
2529 	if ((error = namei(&nd)) != 0)
2530 		return (error);
2531 	vp = nd.ni_vp;
2532 	error = vn_stat(vp, &sb, td);
2533 	NDFREE(&nd, NDF_ONLY_PNBUF);
2534 	vput(vp);
2535 	if (error)
2536 		return (error);
2537 	cvtstat(&sb, &osb);
2538 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2539 	return (error);
2540 }
2541 
2542 /*
2543  * Convert from an old to a new stat structure.
2544  */
2545 void
2546 cvtstat(st, ost)
2547 	struct stat *st;
2548 	struct ostat *ost;
2549 {
2550 
2551 	ost->st_dev = st->st_dev;
2552 	ost->st_ino = st->st_ino;
2553 	ost->st_mode = st->st_mode;
2554 	ost->st_nlink = st->st_nlink;
2555 	ost->st_uid = st->st_uid;
2556 	ost->st_gid = st->st_gid;
2557 	ost->st_rdev = st->st_rdev;
2558 	if (st->st_size < (quad_t)1 << 32)
2559 		ost->st_size = st->st_size;
2560 	else
2561 		ost->st_size = -2;
2562 	ost->st_atime = st->st_atime;
2563 	ost->st_mtime = st->st_mtime;
2564 	ost->st_ctime = st->st_ctime;
2565 	ost->st_blksize = st->st_blksize;
2566 	ost->st_blocks = st->st_blocks;
2567 	ost->st_flags = st->st_flags;
2568 	ost->st_gen = st->st_gen;
2569 }
2570 #endif /* COMPAT_43 || COMPAT_SUNOS */
2571 
2572 /*
2573  * Get file status; this version follows links.
2574  */
2575 #ifndef _SYS_SYSPROTO_H_
2576 struct stat_args {
2577 	char	*path;
2578 	struct stat *ub;
2579 };
2580 #endif
2581 /* ARGSUSED */
2582 int
2583 stat(td, uap)
2584 	struct thread *td;
2585 	register struct stat_args /* {
2586 		syscallarg(char *) path;
2587 		syscallarg(struct stat *) ub;
2588 	} */ *uap;
2589 {
2590 	struct stat sb;
2591 	int error;
2592 	struct nameidata nd;
2593 
2594 #ifdef LOOKUP_SHARED
2595 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2596 	    UIO_USERSPACE, SCARG(uap, path), td);
2597 #else
2598 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2599 	    SCARG(uap, path), td);
2600 #endif
2601 	if ((error = namei(&nd)) != 0)
2602 		return (error);
2603 	error = vn_stat(nd.ni_vp, &sb, td);
2604 	NDFREE(&nd, NDF_ONLY_PNBUF);
2605 	vput(nd.ni_vp);
2606 	if (error)
2607 		return (error);
2608 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2609 	return (error);
2610 }
2611 
2612 /*
2613  * Get file status; this version does not follow links.
2614  */
2615 #ifndef _SYS_SYSPROTO_H_
2616 struct lstat_args {
2617 	char	*path;
2618 	struct stat *ub;
2619 };
2620 #endif
2621 /* ARGSUSED */
2622 int
2623 lstat(td, uap)
2624 	struct thread *td;
2625 	register struct lstat_args /* {
2626 		syscallarg(char *) path;
2627 		syscallarg(struct stat *) ub;
2628 	} */ *uap;
2629 {
2630 	int error;
2631 	struct vnode *vp;
2632 	struct stat sb;
2633 	struct nameidata nd;
2634 
2635 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2636 	    SCARG(uap, path), td);
2637 	if ((error = namei(&nd)) != 0)
2638 		return (error);
2639 	vp = nd.ni_vp;
2640 	error = vn_stat(vp, &sb, td);
2641 	NDFREE(&nd, NDF_ONLY_PNBUF);
2642 	vput(vp);
2643 	if (error)
2644 		return (error);
2645 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2646 	return (error);
2647 }
2648 
2649 /*
2650  * Implementation of the NetBSD stat() function.
2651  * XXX This should probably be collapsed with the FreeBSD version,
2652  * as the differences are only due to vn_stat() clearing spares at
2653  * the end of the structures.  vn_stat could be split to avoid this,
2654  * and thus collapse the following to close to zero code.
2655  */
2656 void
2657 cvtnstat(sb, nsb)
2658 	struct stat *sb;
2659 	struct nstat *nsb;
2660 {
2661 	nsb->st_dev = sb->st_dev;
2662 	nsb->st_ino = sb->st_ino;
2663 	nsb->st_mode = sb->st_mode;
2664 	nsb->st_nlink = sb->st_nlink;
2665 	nsb->st_uid = sb->st_uid;
2666 	nsb->st_gid = sb->st_gid;
2667 	nsb->st_rdev = sb->st_rdev;
2668 	nsb->st_atimespec = sb->st_atimespec;
2669 	nsb->st_mtimespec = sb->st_mtimespec;
2670 	nsb->st_ctimespec = sb->st_ctimespec;
2671 	nsb->st_size = sb->st_size;
2672 	nsb->st_blocks = sb->st_blocks;
2673 	nsb->st_blksize = sb->st_blksize;
2674 	nsb->st_flags = sb->st_flags;
2675 	nsb->st_gen = sb->st_gen;
2676 	nsb->st_qspare[0] = sb->st_qspare[0];
2677 	nsb->st_qspare[1] = sb->st_qspare[1];
2678 }
2679 
2680 #ifndef _SYS_SYSPROTO_H_
2681 struct nstat_args {
2682 	char	*path;
2683 	struct nstat *ub;
2684 };
2685 #endif
2686 /* ARGSUSED */
2687 int
2688 nstat(td, uap)
2689 	struct thread *td;
2690 	register struct nstat_args /* {
2691 		syscallarg(char *) path;
2692 		syscallarg(struct nstat *) ub;
2693 	} */ *uap;
2694 {
2695 	struct stat sb;
2696 	struct nstat nsb;
2697 	int error;
2698 	struct nameidata nd;
2699 
2700 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2701 	    SCARG(uap, path), td);
2702 	if ((error = namei(&nd)) != 0)
2703 		return (error);
2704 	NDFREE(&nd, NDF_ONLY_PNBUF);
2705 	error = vn_stat(nd.ni_vp, &sb, td);
2706 	vput(nd.ni_vp);
2707 	if (error)
2708 		return (error);
2709 	cvtnstat(&sb, &nsb);
2710 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2711 	return (error);
2712 }
2713 
2714 /*
2715  * NetBSD lstat.  Get file status; this version does not follow links.
2716  */
2717 #ifndef _SYS_SYSPROTO_H_
2718 struct lstat_args {
2719 	char	*path;
2720 	struct stat *ub;
2721 };
2722 #endif
2723 /* ARGSUSED */
2724 int
2725 nlstat(td, uap)
2726 	struct thread *td;
2727 	register struct nlstat_args /* {
2728 		syscallarg(char *) path;
2729 		syscallarg(struct nstat *) ub;
2730 	} */ *uap;
2731 {
2732 	int error;
2733 	struct vnode *vp;
2734 	struct stat sb;
2735 	struct nstat nsb;
2736 	struct nameidata nd;
2737 
2738 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2739 	    SCARG(uap, path), td);
2740 	if ((error = namei(&nd)) != 0)
2741 		return (error);
2742 	vp = nd.ni_vp;
2743 	NDFREE(&nd, NDF_ONLY_PNBUF);
2744 	error = vn_stat(vp, &sb, td);
2745 	vput(vp);
2746 	if (error)
2747 		return (error);
2748 	cvtnstat(&sb, &nsb);
2749 	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2750 	return (error);
2751 }
2752 
2753 /*
2754  * Get configurable pathname variables.
2755  */
2756 #ifndef _SYS_SYSPROTO_H_
2757 struct pathconf_args {
2758 	char	*path;
2759 	int	name;
2760 };
2761 #endif
2762 /* ARGSUSED */
2763 int
2764 pathconf(td, uap)
2765 	struct thread *td;
2766 	register struct pathconf_args /* {
2767 		syscallarg(char *) path;
2768 		syscallarg(int) name;
2769 	} */ *uap;
2770 {
2771 	int error;
2772 	struct nameidata nd;
2773 
2774 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2775 	    SCARG(uap, path), td);
2776 	if ((error = namei(&nd)) != 0)
2777 		return (error);
2778 	NDFREE(&nd, NDF_ONLY_PNBUF);
2779 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2780 	vput(nd.ni_vp);
2781 	return (error);
2782 }
2783 
2784 /*
2785  * Return target name of a symbolic link.
2786  */
2787 #ifndef _SYS_SYSPROTO_H_
2788 struct readlink_args {
2789 	char	*path;
2790 	char	*buf;
2791 	int	count;
2792 };
2793 #endif
2794 /* ARGSUSED */
2795 int
2796 readlink(td, uap)
2797 	struct thread *td;
2798 	register struct readlink_args /* {
2799 		syscallarg(char *) path;
2800 		syscallarg(char *) buf;
2801 		syscallarg(int) count;
2802 	} */ *uap;
2803 {
2804 	register struct vnode *vp;
2805 	struct iovec aiov;
2806 	struct uio auio;
2807 	int error;
2808 	struct nameidata nd;
2809 
2810 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2811 	    SCARG(uap, path), td);
2812 	if ((error = namei(&nd)) != 0)
2813 		return (error);
2814 	NDFREE(&nd, NDF_ONLY_PNBUF);
2815 	vp = nd.ni_vp;
2816 	if (vp->v_type != VLNK)
2817 		error = EINVAL;
2818 	else {
2819 		aiov.iov_base = SCARG(uap, buf);
2820 		aiov.iov_len = SCARG(uap, count);
2821 		auio.uio_iov = &aiov;
2822 		auio.uio_iovcnt = 1;
2823 		auio.uio_offset = 0;
2824 		auio.uio_rw = UIO_READ;
2825 		auio.uio_segflg = UIO_USERSPACE;
2826 		auio.uio_td = td;
2827 		auio.uio_resid = SCARG(uap, count);
2828 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2829 	}
2830 	vput(vp);
2831 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2832 	return (error);
2833 }
2834 
2835 /*
2836  * Common implementation code for chflags() and fchflags().
2837  */
2838 static int
2839 setfflags(td, vp, flags)
2840 	struct thread *td;
2841 	struct vnode *vp;
2842 	int flags;
2843 {
2844 	int error;
2845 	struct mount *mp;
2846 	struct vattr vattr;
2847 
2848 	/*
2849 	 * Prevent non-root users from setting flags on devices.  When
2850 	 * a device is reused, users can retain ownership of the device
2851 	 * if they are allowed to set flags and programs assume that
2852 	 * chown can't fail when done as root.
2853 	 */
2854 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2855 		error = suser_cred(td->td_ucred, PRISON_ROOT);
2856 		if (error)
2857 			return (error);
2858 	}
2859 
2860 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2861 		return (error);
2862 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2863 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2864 	VATTR_NULL(&vattr);
2865 	vattr.va_flags = flags;
2866 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2867 	VOP_UNLOCK(vp, 0, td);
2868 	vn_finished_write(mp);
2869 	return (error);
2870 }
2871 
2872 /*
2873  * Change flags of a file given a path name.
2874  */
2875 #ifndef _SYS_SYSPROTO_H_
2876 struct chflags_args {
2877 	char	*path;
2878 	int	flags;
2879 };
2880 #endif
2881 /* ARGSUSED */
2882 int
2883 chflags(td, uap)
2884 	struct thread *td;
2885 	register struct chflags_args /* {
2886 		syscallarg(char *) path;
2887 		syscallarg(int) flags;
2888 	} */ *uap;
2889 {
2890 	int error;
2891 	struct nameidata nd;
2892 
2893 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2894 	if ((error = namei(&nd)) != 0)
2895 		return (error);
2896 	NDFREE(&nd, NDF_ONLY_PNBUF);
2897 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2898 	vrele(nd.ni_vp);
2899 	return error;
2900 }
2901 
2902 /*
2903  * Same as chflags() but doesn't follow symlinks.
2904  */
2905 int
2906 lchflags(td, uap)
2907 	struct thread *td;
2908 	register struct lchflags_args /* {
2909 		syscallarg(char *) path;
2910 		syscallarg(int) flags;
2911 	} */ *uap;
2912 {
2913 	int error;
2914 	struct nameidata nd;
2915 
2916 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2917 	if ((error = namei(&nd)) != 0)
2918 		return (error);
2919 	NDFREE(&nd, NDF_ONLY_PNBUF);
2920 	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2921 	vrele(nd.ni_vp);
2922 	return error;
2923 }
2924 
2925 /*
2926  * Change flags of a file given a file descriptor.
2927  */
2928 #ifndef _SYS_SYSPROTO_H_
2929 struct fchflags_args {
2930 	int	fd;
2931 	int	flags;
2932 };
2933 #endif
2934 /* ARGSUSED */
2935 int
2936 fchflags(td, uap)
2937 	struct thread *td;
2938 	register struct fchflags_args /* {
2939 		syscallarg(int) fd;
2940 		syscallarg(int) flags;
2941 	} */ *uap;
2942 {
2943 	struct file *fp;
2944 	int error;
2945 
2946 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2947 		return (error);
2948 	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2949 	fdrop(fp, td);
2950 	return (error);
2951 }
2952 
2953 /*
2954  * Common implementation code for chmod(), lchmod() and fchmod().
2955  */
2956 static int
2957 setfmode(td, vp, mode)
2958 	struct thread *td;
2959 	struct vnode *vp;
2960 	int mode;
2961 {
2962 	int error;
2963 	struct mount *mp;
2964 	struct vattr vattr;
2965 
2966 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2967 		return (error);
2968 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2969 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2970 	VATTR_NULL(&vattr);
2971 	vattr.va_mode = mode & ALLPERMS;
2972 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2973 	VOP_UNLOCK(vp, 0, td);
2974 	vn_finished_write(mp);
2975 	return error;
2976 }
2977 
2978 /*
2979  * Change mode of a file given path name.
2980  */
2981 #ifndef _SYS_SYSPROTO_H_
2982 struct chmod_args {
2983 	char	*path;
2984 	int	mode;
2985 };
2986 #endif
2987 /* ARGSUSED */
2988 int
2989 chmod(td, uap)
2990 	struct thread *td;
2991 	register struct chmod_args /* {
2992 		syscallarg(char *) path;
2993 		syscallarg(int) mode;
2994 	} */ *uap;
2995 {
2996 	int error;
2997 	struct nameidata nd;
2998 
2999 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3000 	if ((error = namei(&nd)) != 0)
3001 		return (error);
3002 	NDFREE(&nd, NDF_ONLY_PNBUF);
3003 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
3004 	vrele(nd.ni_vp);
3005 	return error;
3006 }
3007 
3008 /*
3009  * Change mode of a file given path name (don't follow links.)
3010  */
3011 #ifndef _SYS_SYSPROTO_H_
3012 struct lchmod_args {
3013 	char	*path;
3014 	int	mode;
3015 };
3016 #endif
3017 /* ARGSUSED */
3018 int
3019 lchmod(td, uap)
3020 	struct thread *td;
3021 	register struct lchmod_args /* {
3022 		syscallarg(char *) path;
3023 		syscallarg(int) mode;
3024 	} */ *uap;
3025 {
3026 	int error;
3027 	struct nameidata nd;
3028 
3029 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3030 	if ((error = namei(&nd)) != 0)
3031 		return (error);
3032 	NDFREE(&nd, NDF_ONLY_PNBUF);
3033 	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
3034 	vrele(nd.ni_vp);
3035 	return error;
3036 }
3037 
3038 /*
3039  * Change mode of a file given a file descriptor.
3040  */
3041 #ifndef _SYS_SYSPROTO_H_
3042 struct fchmod_args {
3043 	int	fd;
3044 	int	mode;
3045 };
3046 #endif
3047 /* ARGSUSED */
3048 int
3049 fchmod(td, uap)
3050 	struct thread *td;
3051 	register struct fchmod_args /* {
3052 		syscallarg(int) fd;
3053 		syscallarg(int) mode;
3054 	} */ *uap;
3055 {
3056 	struct file *fp;
3057 	struct vnode *vp;
3058 	int error;
3059 
3060 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3061 		return (error);
3062 	vp = (struct vnode *)fp->f_data;
3063 	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
3064 	fdrop(fp, td);
3065 	return (error);
3066 }
3067 
3068 /*
3069  * Common implementation for chown(), lchown(), and fchown()
3070  */
3071 static int
3072 setfown(td, vp, uid, gid)
3073 	struct thread *td;
3074 	struct vnode *vp;
3075 	uid_t uid;
3076 	gid_t gid;
3077 {
3078 	int error;
3079 	struct mount *mp;
3080 	struct vattr vattr;
3081 
3082 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3083 		return (error);
3084 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3085 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3086 	VATTR_NULL(&vattr);
3087 	vattr.va_uid = uid;
3088 	vattr.va_gid = gid;
3089 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3090 	VOP_UNLOCK(vp, 0, td);
3091 	vn_finished_write(mp);
3092 	return error;
3093 }
3094 
3095 /*
3096  * Set ownership given a path name.
3097  */
3098 #ifndef _SYS_SYSPROTO_H_
3099 struct chown_args {
3100 	char	*path;
3101 	int	uid;
3102 	int	gid;
3103 };
3104 #endif
3105 /* ARGSUSED */
3106 int
3107 chown(td, uap)
3108 	struct thread *td;
3109 	register struct chown_args /* {
3110 		syscallarg(char *) path;
3111 		syscallarg(int) uid;
3112 		syscallarg(int) gid;
3113 	} */ *uap;
3114 {
3115 	int error;
3116 	struct nameidata nd;
3117 
3118 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3119 	if ((error = namei(&nd)) != 0)
3120 		return (error);
3121 	NDFREE(&nd, NDF_ONLY_PNBUF);
3122 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3123 	vrele(nd.ni_vp);
3124 	return (error);
3125 }
3126 
3127 /*
3128  * Set ownership given a path name, do not cross symlinks.
3129  */
3130 #ifndef _SYS_SYSPROTO_H_
3131 struct lchown_args {
3132 	char	*path;
3133 	int	uid;
3134 	int	gid;
3135 };
3136 #endif
3137 /* ARGSUSED */
3138 int
3139 lchown(td, uap)
3140 	struct thread *td;
3141 	register struct lchown_args /* {
3142 		syscallarg(char *) path;
3143 		syscallarg(int) uid;
3144 		syscallarg(int) gid;
3145 	} */ *uap;
3146 {
3147 	int error;
3148 	struct nameidata nd;
3149 
3150 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3151 	if ((error = namei(&nd)) != 0)
3152 		return (error);
3153 	NDFREE(&nd, NDF_ONLY_PNBUF);
3154 	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3155 	vrele(nd.ni_vp);
3156 	return (error);
3157 }
3158 
3159 /*
3160  * Set ownership given a file descriptor.
3161  */
3162 #ifndef _SYS_SYSPROTO_H_
3163 struct fchown_args {
3164 	int	fd;
3165 	int	uid;
3166 	int	gid;
3167 };
3168 #endif
3169 /* ARGSUSED */
3170 int
3171 fchown(td, uap)
3172 	struct thread *td;
3173 	register struct fchown_args /* {
3174 		syscallarg(int) fd;
3175 		syscallarg(int) uid;
3176 		syscallarg(int) gid;
3177 	} */ *uap;
3178 {
3179 	struct file *fp;
3180 	struct vnode *vp;
3181 	int error;
3182 
3183 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3184 		return (error);
3185 	vp = (struct vnode *)fp->f_data;
3186 	error = setfown(td, (struct vnode *)fp->f_data,
3187 		SCARG(uap, uid), SCARG(uap, gid));
3188 	fdrop(fp, td);
3189 	return (error);
3190 }
3191 
3192 /*
3193  * Common implementation code for utimes(), lutimes(), and futimes().
3194  */
3195 static int
3196 getutimes(usrtvp, tsp)
3197 	const struct timeval *usrtvp;
3198 	struct timespec *tsp;
3199 {
3200 	struct timeval tv[2];
3201 	int error;
3202 
3203 	if (usrtvp == NULL) {
3204 		microtime(&tv[0]);
3205 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3206 		tsp[1] = tsp[0];
3207 	} else {
3208 		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
3209 			return (error);
3210 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3211 		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
3212 	}
3213 	return 0;
3214 }
3215 
3216 /*
3217  * Common implementation code for utimes(), lutimes(), and futimes().
3218  */
3219 static int
3220 setutimes(td, vp, ts, nullflag)
3221 	struct thread *td;
3222 	struct vnode *vp;
3223 	const struct timespec *ts;
3224 	int nullflag;
3225 {
3226 	int error;
3227 	struct mount *mp;
3228 	struct vattr vattr;
3229 
3230 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3231 		return (error);
3232 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3233 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3234 	VATTR_NULL(&vattr);
3235 	vattr.va_atime = ts[0];
3236 	vattr.va_mtime = ts[1];
3237 	if (nullflag)
3238 		vattr.va_vaflags |= VA_UTIMES_NULL;
3239 	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3240 	VOP_UNLOCK(vp, 0, td);
3241 	vn_finished_write(mp);
3242 	return error;
3243 }
3244 
3245 /*
3246  * Set the access and modification times of a file.
3247  */
3248 #ifndef _SYS_SYSPROTO_H_
3249 struct utimes_args {
3250 	char	*path;
3251 	struct	timeval *tptr;
3252 };
3253 #endif
3254 /* ARGSUSED */
3255 int
3256 utimes(td, uap)
3257 	struct thread *td;
3258 	register struct utimes_args /* {
3259 		syscallarg(char *) path;
3260 		syscallarg(struct timeval *) tptr;
3261 	} */ *uap;
3262 {
3263 	struct timespec ts[2];
3264 	struct timeval *usrtvp;
3265 	int error;
3266 	struct nameidata nd;
3267 
3268 	usrtvp = SCARG(uap, tptr);
3269 	if ((error = getutimes(usrtvp, ts)) != 0)
3270 		return (error);
3271 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3272 	if ((error = namei(&nd)) != 0)
3273 		return (error);
3274 	NDFREE(&nd, NDF_ONLY_PNBUF);
3275 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3276 	vrele(nd.ni_vp);
3277 	return (error);
3278 }
3279 
3280 /*
3281  * Set the access and modification times of a file.
3282  */
3283 #ifndef _SYS_SYSPROTO_H_
3284 struct lutimes_args {
3285 	char	*path;
3286 	struct	timeval *tptr;
3287 };
3288 #endif
3289 /* ARGSUSED */
3290 int
3291 lutimes(td, uap)
3292 	struct thread *td;
3293 	register struct lutimes_args /* {
3294 		syscallarg(char *) path;
3295 		syscallarg(struct timeval *) tptr;
3296 	} */ *uap;
3297 {
3298 	struct timespec ts[2];
3299 	struct timeval *usrtvp;
3300 	int error;
3301 	struct nameidata nd;
3302 
3303 	usrtvp = SCARG(uap, tptr);
3304 	if ((error = getutimes(usrtvp, ts)) != 0)
3305 		return (error);
3306 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3307 	if ((error = namei(&nd)) != 0)
3308 		return (error);
3309 	NDFREE(&nd, NDF_ONLY_PNBUF);
3310 	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3311 	vrele(nd.ni_vp);
3312 	return (error);
3313 }
3314 
3315 /*
3316  * Set the access and modification times of a file.
3317  */
3318 #ifndef _SYS_SYSPROTO_H_
3319 struct futimes_args {
3320 	int	fd;
3321 	struct	timeval *tptr;
3322 };
3323 #endif
3324 /* ARGSUSED */
3325 int
3326 futimes(td, uap)
3327 	struct thread *td;
3328 	register struct futimes_args /* {
3329 		syscallarg(int ) fd;
3330 		syscallarg(struct timeval *) tptr;
3331 	} */ *uap;
3332 {
3333 	struct timespec ts[2];
3334 	struct file *fp;
3335 	struct timeval *usrtvp;
3336 	int error;
3337 
3338 	usrtvp = SCARG(uap, tptr);
3339 	if ((error = getutimes(usrtvp, ts)) != 0)
3340 		return (error);
3341 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3342 		return (error);
3343 	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
3344 	fdrop(fp, td);
3345 	return (error);
3346 }
3347 
3348 /*
3349  * Truncate a file given its path name.
3350  */
3351 #ifndef _SYS_SYSPROTO_H_
3352 struct truncate_args {
3353 	char	*path;
3354 	int	pad;
3355 	off_t	length;
3356 };
3357 #endif
3358 /* ARGSUSED */
3359 int
3360 truncate(td, uap)
3361 	struct thread *td;
3362 	register struct truncate_args /* {
3363 		syscallarg(char *) path;
3364 		syscallarg(int) pad;
3365 		syscallarg(off_t) length;
3366 	} */ *uap;
3367 {
3368 	struct mount *mp;
3369 	struct vnode *vp;
3370 	struct vattr vattr;
3371 	int error;
3372 	struct nameidata nd;
3373 
3374 	if (uap->length < 0)
3375 		return(EINVAL);
3376 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3377 	if ((error = namei(&nd)) != 0)
3378 		return (error);
3379 	vp = nd.ni_vp;
3380 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3381 		vrele(vp);
3382 		return (error);
3383 	}
3384 	NDFREE(&nd, NDF_ONLY_PNBUF);
3385 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3386 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3387 	if (vp->v_type == VDIR)
3388 		error = EISDIR;
3389 	else if ((error = vn_writechk(vp)) == 0 &&
3390 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3391 		VATTR_NULL(&vattr);
3392 		vattr.va_size = SCARG(uap, length);
3393 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3394 	}
3395 	vput(vp);
3396 	vn_finished_write(mp);
3397 	return (error);
3398 }
3399 
3400 /*
3401  * Truncate a file given a file descriptor.
3402  */
3403 #ifndef _SYS_SYSPROTO_H_
3404 struct ftruncate_args {
3405 	int	fd;
3406 	int	pad;
3407 	off_t	length;
3408 };
3409 #endif
3410 /* ARGSUSED */
3411 int
3412 ftruncate(td, uap)
3413 	struct thread *td;
3414 	register struct ftruncate_args /* {
3415 		syscallarg(int) fd;
3416 		syscallarg(int) pad;
3417 		syscallarg(off_t) length;
3418 	} */ *uap;
3419 {
3420 	struct mount *mp;
3421 	struct vattr vattr;
3422 	struct vnode *vp;
3423 	struct file *fp;
3424 	int error;
3425 
3426 	if (uap->length < 0)
3427 		return(EINVAL);
3428 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3429 		return (error);
3430 	if ((fp->f_flag & FWRITE) == 0) {
3431 		fdrop(fp, td);
3432 		return (EINVAL);
3433 	}
3434 	vp = (struct vnode *)fp->f_data;
3435 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3436 		fdrop(fp, td);
3437 		return (error);
3438 	}
3439 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3440 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3441 	if (vp->v_type == VDIR)
3442 		error = EISDIR;
3443 	else if ((error = vn_writechk(vp)) == 0) {
3444 		VATTR_NULL(&vattr);
3445 		vattr.va_size = SCARG(uap, length);
3446 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3447 	}
3448 	VOP_UNLOCK(vp, 0, td);
3449 	vn_finished_write(mp);
3450 	fdrop(fp, td);
3451 	return (error);
3452 }
3453 
3454 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3455 /*
3456  * Truncate a file given its path name.
3457  */
3458 #ifndef _SYS_SYSPROTO_H_
3459 struct otruncate_args {
3460 	char	*path;
3461 	long	length;
3462 };
3463 #endif
3464 /* ARGSUSED */
3465 int
3466 otruncate(td, uap)
3467 	struct thread *td;
3468 	register struct otruncate_args /* {
3469 		syscallarg(char *) path;
3470 		syscallarg(long) length;
3471 	} */ *uap;
3472 {
3473 	struct truncate_args /* {
3474 		syscallarg(char *) path;
3475 		syscallarg(int) pad;
3476 		syscallarg(off_t) length;
3477 	} */ nuap;
3478 
3479 	SCARG(&nuap, path) = SCARG(uap, path);
3480 	SCARG(&nuap, length) = SCARG(uap, length);
3481 	return (truncate(td, &nuap));
3482 }
3483 
3484 /*
3485  * Truncate a file given a file descriptor.
3486  */
3487 #ifndef _SYS_SYSPROTO_H_
3488 struct oftruncate_args {
3489 	int	fd;
3490 	long	length;
3491 };
3492 #endif
3493 /* ARGSUSED */
3494 int
3495 oftruncate(td, uap)
3496 	struct thread *td;
3497 	register struct oftruncate_args /* {
3498 		syscallarg(int) fd;
3499 		syscallarg(long) length;
3500 	} */ *uap;
3501 {
3502 	struct ftruncate_args /* {
3503 		syscallarg(int) fd;
3504 		syscallarg(int) pad;
3505 		syscallarg(off_t) length;
3506 	} */ nuap;
3507 
3508 	SCARG(&nuap, fd) = SCARG(uap, fd);
3509 	SCARG(&nuap, length) = SCARG(uap, length);
3510 	return (ftruncate(td, &nuap));
3511 }
3512 #endif /* COMPAT_43 || COMPAT_SUNOS */
3513 
3514 /*
3515  * Sync an open file.
3516  */
3517 #ifndef _SYS_SYSPROTO_H_
3518 struct fsync_args {
3519 	int	fd;
3520 };
3521 #endif
3522 /* ARGSUSED */
3523 int
3524 fsync(td, uap)
3525 	struct thread *td;
3526 	struct fsync_args /* {
3527 		syscallarg(int) fd;
3528 	} */ *uap;
3529 {
3530 	struct vnode *vp;
3531 	struct mount *mp;
3532 	struct file *fp;
3533 	vm_object_t obj;
3534 	int error;
3535 
3536 	GIANT_REQUIRED;
3537 
3538 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3539 		return (error);
3540 	vp = (struct vnode *)fp->f_data;
3541 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3542 		fdrop(fp, td);
3543 		return (error);
3544 	}
3545 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3546 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3547 		vm_object_page_clean(obj, 0, 0, 0);
3548 	}
3549 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3550 #ifdef SOFTUPDATES
3551 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3552 	    error = softdep_fsync(vp);
3553 #endif
3554 
3555 	VOP_UNLOCK(vp, 0, td);
3556 	vn_finished_write(mp);
3557 	fdrop(fp, td);
3558 	return (error);
3559 }
3560 
3561 /*
3562  * Rename files.  Source and destination must either both be directories,
3563  * or both not be directories.  If target is a directory, it must be empty.
3564  */
3565 #ifndef _SYS_SYSPROTO_H_
3566 struct rename_args {
3567 	char	*from;
3568 	char	*to;
3569 };
3570 #endif
3571 /* ARGSUSED */
3572 int
3573 rename(td, uap)
3574 	struct thread *td;
3575 	register struct rename_args /* {
3576 		syscallarg(char *) from;
3577 		syscallarg(char *) to;
3578 	} */ *uap;
3579 {
3580 	struct mount *mp;
3581 	struct vnode *tvp, *fvp, *tdvp;
3582 	struct nameidata fromnd, tond;
3583 	int error;
3584 
3585 	bwillwrite();
3586 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3587 	    SCARG(uap, from), td);
3588 	if ((error = namei(&fromnd)) != 0)
3589 		return (error);
3590 	fvp = fromnd.ni_vp;
3591 	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3592 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3593 		vrele(fromnd.ni_dvp);
3594 		vrele(fvp);
3595 		goto out1;
3596 	}
3597 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3598 	    UIO_USERSPACE, SCARG(uap, to), td);
3599 	if (fromnd.ni_vp->v_type == VDIR)
3600 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3601 	if ((error = namei(&tond)) != 0) {
3602 		/* Translate error code for rename("dir1", "dir2/."). */
3603 		if (error == EISDIR && fvp->v_type == VDIR)
3604 			error = EINVAL;
3605 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3606 		vrele(fromnd.ni_dvp);
3607 		vrele(fvp);
3608 		goto out1;
3609 	}
3610 	tdvp = tond.ni_dvp;
3611 	tvp = tond.ni_vp;
3612 	if (tvp != NULL) {
3613 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3614 			error = ENOTDIR;
3615 			goto out;
3616 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3617 			error = EISDIR;
3618 			goto out;
3619 		}
3620 	}
3621 	if (fvp == tdvp)
3622 		error = EINVAL;
3623 	/*
3624 	 * If source is the same as the destination (that is the
3625 	 * same inode number with the same name in the same directory),
3626 	 * then there is nothing to do.
3627 	 */
3628 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3629 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3630 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3631 	      fromnd.ni_cnd.cn_namelen))
3632 		error = -1;
3633 out:
3634 	if (!error) {
3635 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3636 		if (fromnd.ni_dvp != tdvp) {
3637 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3638 		}
3639 		if (tvp) {
3640 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3641 		}
3642 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3643 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3644 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3645 		NDFREE(&tond, NDF_ONLY_PNBUF);
3646 	} else {
3647 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3648 		NDFREE(&tond, NDF_ONLY_PNBUF);
3649 		if (tdvp == tvp)
3650 			vrele(tdvp);
3651 		else
3652 			vput(tdvp);
3653 		if (tvp)
3654 			vput(tvp);
3655 		vrele(fromnd.ni_dvp);
3656 		vrele(fvp);
3657 	}
3658 	vrele(tond.ni_startdir);
3659 	vn_finished_write(mp);
3660 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3661 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3662 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3663 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3664 out1:
3665 	if (fromnd.ni_startdir)
3666 		vrele(fromnd.ni_startdir);
3667 	if (error == -1)
3668 		return (0);
3669 	return (error);
3670 }
3671 
3672 /*
3673  * Make a directory file.
3674  */
3675 #ifndef _SYS_SYSPROTO_H_
3676 struct mkdir_args {
3677 	char	*path;
3678 	int	mode;
3679 };
3680 #endif
3681 /* ARGSUSED */
3682 int
3683 mkdir(td, uap)
3684 	struct thread *td;
3685 	register struct mkdir_args /* {
3686 		syscallarg(char *) path;
3687 		syscallarg(int) mode;
3688 	} */ *uap;
3689 {
3690 
3691 	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3692 }
3693 
3694 int
3695 vn_mkdir(path, mode, segflg, td)
3696 	char *path;
3697 	int mode;
3698 	enum uio_seg segflg;
3699 	struct thread *td;
3700 {
3701 	struct mount *mp;
3702 	struct vnode *vp;
3703 	struct vattr vattr;
3704 	int error;
3705 	struct nameidata nd;
3706 
3707 restart:
3708 	bwillwrite();
3709 	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3710 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3711 	if ((error = namei(&nd)) != 0)
3712 		return (error);
3713 	vp = nd.ni_vp;
3714 	if (vp != NULL) {
3715 		NDFREE(&nd, NDF_ONLY_PNBUF);
3716 		vrele(vp);
3717 		vput(nd.ni_dvp);
3718 		return (EEXIST);
3719 	}
3720 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3721 		NDFREE(&nd, NDF_ONLY_PNBUF);
3722 		vput(nd.ni_dvp);
3723 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3724 			return (error);
3725 		goto restart;
3726 	}
3727 	VATTR_NULL(&vattr);
3728 	vattr.va_type = VDIR;
3729 	FILEDESC_LOCK(td->td_proc->p_fd);
3730 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3731 	FILEDESC_UNLOCK(td->td_proc->p_fd);
3732 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3733 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3734 	NDFREE(&nd, NDF_ONLY_PNBUF);
3735 	vput(nd.ni_dvp);
3736 	if (!error)
3737 		vput(nd.ni_vp);
3738 	vn_finished_write(mp);
3739 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3740 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3741 	return (error);
3742 }
3743 
3744 /*
3745  * Remove a directory file.
3746  */
3747 #ifndef _SYS_SYSPROTO_H_
3748 struct rmdir_args {
3749 	char	*path;
3750 };
3751 #endif
3752 /* ARGSUSED */
3753 int
3754 rmdir(td, uap)
3755 	struct thread *td;
3756 	struct rmdir_args /* {
3757 		syscallarg(char *) path;
3758 	} */ *uap;
3759 {
3760 	struct mount *mp;
3761 	struct vnode *vp;
3762 	int error;
3763 	struct nameidata nd;
3764 
3765 restart:
3766 	bwillwrite();
3767 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3768 	    SCARG(uap, path), td);
3769 	if ((error = namei(&nd)) != 0)
3770 		return (error);
3771 	vp = nd.ni_vp;
3772 	if (vp->v_type != VDIR) {
3773 		error = ENOTDIR;
3774 		goto out;
3775 	}
3776 	/*
3777 	 * No rmdir "." please.
3778 	 */
3779 	if (nd.ni_dvp == vp) {
3780 		error = EINVAL;
3781 		goto out;
3782 	}
3783 	/*
3784 	 * The root of a mounted filesystem cannot be deleted.
3785 	 */
3786 	if (vp->v_flag & VROOT) {
3787 		error = EBUSY;
3788 		goto out;
3789 	}
3790 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3791 		NDFREE(&nd, NDF_ONLY_PNBUF);
3792 		if (nd.ni_dvp == vp)
3793 			vrele(nd.ni_dvp);
3794 		else
3795 			vput(nd.ni_dvp);
3796 		vput(vp);
3797 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3798 			return (error);
3799 		goto restart;
3800 	}
3801 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3802 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3803 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3804 	vn_finished_write(mp);
3805 out:
3806 	NDFREE(&nd, NDF_ONLY_PNBUF);
3807 	if (nd.ni_dvp == vp)
3808 		vrele(nd.ni_dvp);
3809 	else
3810 		vput(nd.ni_dvp);
3811 	vput(vp);
3812 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3813 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3814 	return (error);
3815 }
3816 
3817 #ifdef COMPAT_43
3818 /*
3819  * Read a block of directory entries in a filesystem independent format.
3820  */
3821 #ifndef _SYS_SYSPROTO_H_
3822 struct ogetdirentries_args {
3823 	int	fd;
3824 	char	*buf;
3825 	u_int	count;
3826 	long	*basep;
3827 };
3828 #endif
3829 int
3830 ogetdirentries(td, uap)
3831 	struct thread *td;
3832 	register struct ogetdirentries_args /* {
3833 		syscallarg(int) fd;
3834 		syscallarg(char *) buf;
3835 		syscallarg(u_int) count;
3836 		syscallarg(long *) basep;
3837 	} */ *uap;
3838 {
3839 	struct vnode *vp;
3840 	struct file *fp;
3841 	struct uio auio, kuio;
3842 	struct iovec aiov, kiov;
3843 	struct dirent *dp, *edp;
3844 	caddr_t dirbuf;
3845 	int error, eofflag, readcnt;
3846 	long loff;
3847 
3848 	/* XXX arbitrary sanity limit on `count'. */
3849 	if (SCARG(uap, count) > 64 * 1024)
3850 		return (EINVAL);
3851 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3852 		return (error);
3853 	if ((fp->f_flag & FREAD) == 0) {
3854 		fdrop(fp, td);
3855 		return (EBADF);
3856 	}
3857 	vp = (struct vnode *)fp->f_data;
3858 unionread:
3859 	if (vp->v_type != VDIR) {
3860 		fdrop(fp, td);
3861 		return (EINVAL);
3862 	}
3863 	aiov.iov_base = SCARG(uap, buf);
3864 	aiov.iov_len = SCARG(uap, count);
3865 	auio.uio_iov = &aiov;
3866 	auio.uio_iovcnt = 1;
3867 	auio.uio_rw = UIO_READ;
3868 	auio.uio_segflg = UIO_USERSPACE;
3869 	auio.uio_td = td;
3870 	auio.uio_resid = SCARG(uap, count);
3871 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3872 	loff = auio.uio_offset = fp->f_offset;
3873 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3874 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3875 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3876 			    NULL, NULL);
3877 			fp->f_offset = auio.uio_offset;
3878 		} else
3879 #	endif
3880 	{
3881 		kuio = auio;
3882 		kuio.uio_iov = &kiov;
3883 		kuio.uio_segflg = UIO_SYSSPACE;
3884 		kiov.iov_len = SCARG(uap, count);
3885 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3886 		kiov.iov_base = dirbuf;
3887 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3888 			    NULL, NULL);
3889 		fp->f_offset = kuio.uio_offset;
3890 		if (error == 0) {
3891 			readcnt = SCARG(uap, count) - kuio.uio_resid;
3892 			edp = (struct dirent *)&dirbuf[readcnt];
3893 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3894 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3895 					/*
3896 					 * The expected low byte of
3897 					 * dp->d_namlen is our dp->d_type.
3898 					 * The high MBZ byte of dp->d_namlen
3899 					 * is our dp->d_namlen.
3900 					 */
3901 					dp->d_type = dp->d_namlen;
3902 					dp->d_namlen = 0;
3903 #				else
3904 					/*
3905 					 * The dp->d_type is the high byte
3906 					 * of the expected dp->d_namlen,
3907 					 * so must be zero'ed.
3908 					 */
3909 					dp->d_type = 0;
3910 #				endif
3911 				if (dp->d_reclen > 0) {
3912 					dp = (struct dirent *)
3913 					    ((char *)dp + dp->d_reclen);
3914 				} else {
3915 					error = EIO;
3916 					break;
3917 				}
3918 			}
3919 			if (dp >= edp)
3920 				error = uiomove(dirbuf, readcnt, &auio);
3921 		}
3922 		FREE(dirbuf, M_TEMP);
3923 	}
3924 	VOP_UNLOCK(vp, 0, td);
3925 	if (error) {
3926 		fdrop(fp, td);
3927 		return (error);
3928 	}
3929 	if (SCARG(uap, count) == auio.uio_resid) {
3930 		if (union_dircheckp) {
3931 			error = union_dircheckp(td, &vp, fp);
3932 			if (error == -1)
3933 				goto unionread;
3934 			if (error) {
3935 				fdrop(fp, td);
3936 				return (error);
3937 			}
3938 		}
3939 		if ((vp->v_flag & VROOT) &&
3940 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3941 			struct vnode *tvp = vp;
3942 			vp = vp->v_mount->mnt_vnodecovered;
3943 			VREF(vp);
3944 			fp->f_data = (caddr_t) vp;
3945 			fp->f_offset = 0;
3946 			vrele(tvp);
3947 			goto unionread;
3948 		}
3949 	}
3950 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3951 	    sizeof(long));
3952 	fdrop(fp, td);
3953 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3954 	return (error);
3955 }
3956 #endif /* COMPAT_43 */
3957 
3958 /*
3959  * Read a block of directory entries in a filesystem independent format.
3960  */
3961 #ifndef _SYS_SYSPROTO_H_
3962 struct getdirentries_args {
3963 	int	fd;
3964 	char	*buf;
3965 	u_int	count;
3966 	long	*basep;
3967 };
3968 #endif
3969 int
3970 getdirentries(td, uap)
3971 	struct thread *td;
3972 	register struct getdirentries_args /* {
3973 		syscallarg(int) fd;
3974 		syscallarg(char *) buf;
3975 		syscallarg(u_int) count;
3976 		syscallarg(long *) basep;
3977 	} */ *uap;
3978 {
3979 	struct vnode *vp;
3980 	struct file *fp;
3981 	struct uio auio;
3982 	struct iovec aiov;
3983 	long loff;
3984 	int error, eofflag;
3985 
3986 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3987 		return (error);
3988 	if ((fp->f_flag & FREAD) == 0) {
3989 		fdrop(fp, td);
3990 		return (EBADF);
3991 	}
3992 	vp = (struct vnode *)fp->f_data;
3993 unionread:
3994 	if (vp->v_type != VDIR) {
3995 		fdrop(fp, td);
3996 		return (EINVAL);
3997 	}
3998 	aiov.iov_base = SCARG(uap, buf);
3999 	aiov.iov_len = SCARG(uap, count);
4000 	auio.uio_iov = &aiov;
4001 	auio.uio_iovcnt = 1;
4002 	auio.uio_rw = UIO_READ;
4003 	auio.uio_segflg = UIO_USERSPACE;
4004 	auio.uio_td = td;
4005 	auio.uio_resid = SCARG(uap, count);
4006 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
4007 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4008 	loff = auio.uio_offset = fp->f_offset;
4009 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
4010 	fp->f_offset = auio.uio_offset;
4011 	VOP_UNLOCK(vp, 0, td);
4012 	if (error) {
4013 		fdrop(fp, td);
4014 		return (error);
4015 	}
4016 	if (SCARG(uap, count) == auio.uio_resid) {
4017 		if (union_dircheckp) {
4018 			error = union_dircheckp(td, &vp, fp);
4019 			if (error == -1)
4020 				goto unionread;
4021 			if (error) {
4022 				fdrop(fp, td);
4023 				return (error);
4024 			}
4025 		}
4026 		if ((vp->v_flag & VROOT) &&
4027 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
4028 			struct vnode *tvp = vp;
4029 			vp = vp->v_mount->mnt_vnodecovered;
4030 			VREF(vp);
4031 			fp->f_data = (caddr_t) vp;
4032 			fp->f_offset = 0;
4033 			vrele(tvp);
4034 			goto unionread;
4035 		}
4036 	}
4037 	if (SCARG(uap, basep) != NULL) {
4038 		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
4039 		    sizeof(long));
4040 	}
4041 	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
4042 	fdrop(fp, td);
4043 	return (error);
4044 }
4045 #ifndef _SYS_SYSPROTO_H_
4046 struct getdents_args {
4047 	int fd;
4048 	char *buf;
4049 	size_t count;
4050 };
4051 #endif
4052 int
4053 getdents(td, uap)
4054 	struct thread *td;
4055 	register struct getdents_args /* {
4056 		syscallarg(int) fd;
4057 		syscallarg(char *) buf;
4058 		syscallarg(u_int) count;
4059 	} */ *uap;
4060 {
4061 	struct getdirentries_args ap;
4062 	ap.fd = uap->fd;
4063 	ap.buf = uap->buf;
4064 	ap.count = uap->count;
4065 	ap.basep = NULL;
4066 	return getdirentries(td, &ap);
4067 }
4068 
4069 /*
4070  * Set the mode mask for creation of filesystem nodes.
4071  *
4072  * MP SAFE
4073  */
4074 #ifndef _SYS_SYSPROTO_H_
4075 struct umask_args {
4076 	int	newmask;
4077 };
4078 #endif
4079 int
4080 umask(td, uap)
4081 	struct thread *td;
4082 	struct umask_args /* {
4083 		syscallarg(int) newmask;
4084 	} */ *uap;
4085 {
4086 	register struct filedesc *fdp;
4087 
4088 	FILEDESC_LOCK(td->td_proc->p_fd);
4089 	fdp = td->td_proc->p_fd;
4090 	td->td_retval[0] = fdp->fd_cmask;
4091 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
4092 	FILEDESC_UNLOCK(td->td_proc->p_fd);
4093 	return (0);
4094 }
4095 
4096 /*
4097  * Void all references to file by ripping underlying filesystem
4098  * away from vnode.
4099  */
4100 #ifndef _SYS_SYSPROTO_H_
4101 struct revoke_args {
4102 	char	*path;
4103 };
4104 #endif
4105 /* ARGSUSED */
4106 int
4107 revoke(td, uap)
4108 	struct thread *td;
4109 	register struct revoke_args /* {
4110 		syscallarg(char *) path;
4111 	} */ *uap;
4112 {
4113 	struct mount *mp;
4114 	struct vnode *vp;
4115 	struct vattr vattr;
4116 	int error;
4117 	struct nameidata nd;
4118 
4119 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
4120 	    td);
4121 	if ((error = namei(&nd)) != 0)
4122 		return (error);
4123 	vp = nd.ni_vp;
4124 	NDFREE(&nd, NDF_ONLY_PNBUF);
4125 	if (vp->v_type != VCHR) {
4126 		vput(vp);
4127 		return (EINVAL);
4128 	}
4129 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
4130 	if (error) {
4131 		vput(vp);
4132 		return (error);
4133 	}
4134 	VOP_UNLOCK(vp, 0, td);
4135 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4136 		error = suser_cred(td->td_ucred, PRISON_ROOT);
4137 		if (error)
4138 			goto out;
4139 	}
4140 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4141 		goto out;
4142 	if (vcount(vp) > 1)
4143 		VOP_REVOKE(vp, REVOKEALL);
4144 	vn_finished_write(mp);
4145 out:
4146 	vrele(vp);
4147 	return (error);
4148 }
4149 
4150 /*
4151  * Convert a user file descriptor to a kernel file entry.
4152  * The file entry is locked upon returning.
4153  */
4154 int
4155 getvnode(fdp, fd, fpp)
4156 	struct filedesc *fdp;
4157 	int fd;
4158 	struct file **fpp;
4159 {
4160 	int error;
4161 	struct file *fp;
4162 
4163 	fp = NULL;
4164 	if (fdp == NULL)
4165 		error = EBADF;
4166 	else {
4167 		FILEDESC_LOCK(fdp);
4168 		if ((u_int)fd >= fdp->fd_nfiles ||
4169 		    (fp = fdp->fd_ofiles[fd]) == NULL)
4170 			error = EBADF;
4171 		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
4172 			fp = NULL;
4173 			error = EINVAL;
4174 		} else {
4175 			fhold(fp);
4176 			error = 0;
4177 		}
4178 		FILEDESC_UNLOCK(fdp);
4179 	}
4180 	*fpp = fp;
4181 	return (error);
4182 }
4183 /*
4184  * Get (NFS) file handle
4185  */
4186 #ifndef _SYS_SYSPROTO_H_
4187 struct getfh_args {
4188 	char	*fname;
4189 	fhandle_t *fhp;
4190 };
4191 #endif
4192 int
4193 getfh(td, uap)
4194 	struct thread *td;
4195 	register struct getfh_args *uap;
4196 {
4197 	struct nameidata nd;
4198 	fhandle_t fh;
4199 	register struct vnode *vp;
4200 	int error;
4201 
4202 	/*
4203 	 * Must be super user
4204 	 */
4205 	error = suser(td);
4206 	if (error)
4207 		return (error);
4208 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
4209 	error = namei(&nd);
4210 	if (error)
4211 		return (error);
4212 	NDFREE(&nd, NDF_ONLY_PNBUF);
4213 	vp = nd.ni_vp;
4214 	bzero(&fh, sizeof(fh));
4215 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4216 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4217 	vput(vp);
4218 	if (error)
4219 		return (error);
4220 	error = copyout(&fh, uap->fhp, sizeof (fh));
4221 	return (error);
4222 }
4223 
4224 /*
4225  * syscall for the rpc.lockd to use to translate a NFS file handle into
4226  * an open descriptor.
4227  *
4228  * warning: do not remove the suser() call or this becomes one giant
4229  * security hole.
4230  */
4231 #ifndef _SYS_SYSPROTO_H_
4232 struct fhopen_args {
4233 	const struct fhandle *u_fhp;
4234 	int flags;
4235 };
4236 #endif
4237 int
4238 fhopen(td, uap)
4239 	struct thread *td;
4240 	struct fhopen_args /* {
4241 		syscallarg(const struct fhandle *) u_fhp;
4242 		syscallarg(int) flags;
4243 	} */ *uap;
4244 {
4245 	struct proc *p = td->td_proc;
4246 	struct mount *mp;
4247 	struct vnode *vp;
4248 	struct fhandle fhp;
4249 	struct vattr vat;
4250 	struct vattr *vap = &vat;
4251 	struct flock lf;
4252 	struct file *fp;
4253 	register struct filedesc *fdp = p->p_fd;
4254 	int fmode, mode, error, type;
4255 	struct file *nfp;
4256 	int indx;
4257 
4258 	/*
4259 	 * Must be super user
4260 	 */
4261 	error = suser(td);
4262 	if (error)
4263 		return (error);
4264 
4265 	fmode = FFLAGS(SCARG(uap, flags));
4266 	/* why not allow a non-read/write open for our lockd? */
4267 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4268 		return (EINVAL);
4269 	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
4270 	if (error)
4271 		return(error);
4272 	/* find the mount point */
4273 	mp = vfs_getvfs(&fhp.fh_fsid);
4274 	if (mp == NULL)
4275 		return (ESTALE);
4276 	/* now give me my vnode, it gets returned to me locked */
4277 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4278 	if (error)
4279 		return (error);
4280  	/*
4281 	 * from now on we have to make sure not
4282 	 * to forget about the vnode
4283 	 * any error that causes an abort must vput(vp)
4284 	 * just set error = err and 'goto bad;'.
4285 	 */
4286 
4287 	/*
4288 	 * from vn_open
4289 	 */
4290 	if (vp->v_type == VLNK) {
4291 		error = EMLINK;
4292 		goto bad;
4293 	}
4294 	if (vp->v_type == VSOCK) {
4295 		error = EOPNOTSUPP;
4296 		goto bad;
4297 	}
4298 	mode = 0;
4299 	if (fmode & (FWRITE | O_TRUNC)) {
4300 		if (vp->v_type == VDIR) {
4301 			error = EISDIR;
4302 			goto bad;
4303 		}
4304 		error = vn_writechk(vp);
4305 		if (error)
4306 			goto bad;
4307 		mode |= VWRITE;
4308 	}
4309 	if (fmode & FREAD)
4310 		mode |= VREAD;
4311 	if (mode) {
4312 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4313 		if (error)
4314 			goto bad;
4315 	}
4316 	if (fmode & O_TRUNC) {
4317 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4318 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4319 			vrele(vp);
4320 			return (error);
4321 		}
4322 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4323 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4324 		VATTR_NULL(vap);
4325 		vap->va_size = 0;
4326 		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4327 		vn_finished_write(mp);
4328 		if (error)
4329 			goto bad;
4330 	}
4331 	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
4332 	if (error)
4333 		goto bad;
4334 	/*
4335 	 * Make sure that a VM object is created for VMIO support.
4336 	 */
4337 	if (vn_canvmio(vp) == TRUE) {
4338 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
4339 			goto bad;
4340 	}
4341 	if (fmode & FWRITE)
4342 		vp->v_writecount++;
4343 
4344 	/*
4345 	 * end of vn_open code
4346 	 */
4347 
4348 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4349 		if (fmode & FWRITE)
4350 			vp->v_writecount--;
4351 		goto bad;
4352 	}
4353 	fp = nfp;
4354 
4355 	/*
4356 	 * Hold an extra reference to avoid having fp ripped out
4357 	 * from under us while we block in the lock op
4358 	 */
4359 	fhold(fp);
4360 	nfp->f_data = (caddr_t)vp;
4361 	nfp->f_flag = fmode & FMASK;
4362 	nfp->f_ops = &vnops;
4363 	nfp->f_type = DTYPE_VNODE;
4364 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4365 		lf.l_whence = SEEK_SET;
4366 		lf.l_start = 0;
4367 		lf.l_len = 0;
4368 		if (fmode & O_EXLOCK)
4369 			lf.l_type = F_WRLCK;
4370 		else
4371 			lf.l_type = F_RDLCK;
4372 		type = F_FLOCK;
4373 		if ((fmode & FNONBLOCK) == 0)
4374 			type |= F_WAIT;
4375 		VOP_UNLOCK(vp, 0, td);
4376 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4377 			/*
4378 			 * The lock request failed.  Normally close the
4379 			 * descriptor but handle the case where someone might
4380 			 * have dup()d or close()d it when we weren't looking.
4381 			 */
4382 			FILEDESC_LOCK(fdp);
4383 			if (fdp->fd_ofiles[indx] == fp) {
4384 				fdp->fd_ofiles[indx] = NULL;
4385 				FILEDESC_UNLOCK(fdp);
4386 				fdrop(fp, td);
4387 			} else
4388 				FILEDESC_UNLOCK(fdp);
4389 			/*
4390 			 * release our private reference
4391 			 */
4392 			fdrop(fp, td);
4393 			return(error);
4394 		}
4395 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4396 		fp->f_flag |= FHASLOCK;
4397 	}
4398 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4399 		vfs_object_create(vp, td, td->td_ucred);
4400 
4401 	VOP_UNLOCK(vp, 0, td);
4402 	fdrop(fp, td);
4403 	td->td_retval[0] = indx;
4404 	return (0);
4405 
4406 bad:
4407 	vput(vp);
4408 	return (error);
4409 }
4410 
4411 /*
4412  * Stat an (NFS) file handle.
4413  */
4414 #ifndef _SYS_SYSPROTO_H_
4415 struct fhstat_args {
4416 	struct fhandle *u_fhp;
4417 	struct stat *sb;
4418 };
4419 #endif
4420 int
4421 fhstat(td, uap)
4422 	struct thread *td;
4423 	register struct fhstat_args /* {
4424 		syscallarg(struct fhandle *) u_fhp;
4425 		syscallarg(struct stat *) sb;
4426 	} */ *uap;
4427 {
4428 	struct stat sb;
4429 	fhandle_t fh;
4430 	struct mount *mp;
4431 	struct vnode *vp;
4432 	int error;
4433 
4434 	/*
4435 	 * Must be super user
4436 	 */
4437 	error = suser(td);
4438 	if (error)
4439 		return (error);
4440 
4441 	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
4442 	if (error)
4443 		return (error);
4444 
4445 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4446 		return (ESTALE);
4447 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4448 		return (error);
4449 	error = vn_stat(vp, &sb, td);
4450 	vput(vp);
4451 	if (error)
4452 		return (error);
4453 	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
4454 	return (error);
4455 }
4456 
4457 /*
4458  * Implement fstatfs() for (NFS) file handles.
4459  */
4460 #ifndef _SYS_SYSPROTO_H_
4461 struct fhstatfs_args {
4462 	struct fhandle *u_fhp;
4463 	struct statfs *buf;
4464 };
4465 #endif
4466 int
4467 fhstatfs(td, uap)
4468 	struct thread *td;
4469 	struct fhstatfs_args /* {
4470 		syscallarg(struct fhandle) *u_fhp;
4471 		syscallarg(struct statfs) *buf;
4472 	} */ *uap;
4473 {
4474 	struct statfs *sp;
4475 	struct mount *mp;
4476 	struct vnode *vp;
4477 	struct statfs sb;
4478 	fhandle_t fh;
4479 	int error;
4480 
4481 	/*
4482 	 * Must be super user
4483 	 */
4484 	error = suser(td);
4485 	if (error)
4486 		return (error);
4487 
4488 	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
4489 		return (error);
4490 
4491 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4492 		return (ESTALE);
4493 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4494 		return (error);
4495 	mp = vp->v_mount;
4496 	sp = &mp->mnt_stat;
4497 	vput(vp);
4498 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4499 		return (error);
4500 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4501 	if (suser(td)) {
4502 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
4503 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4504 		sp = &sb;
4505 	}
4506 	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
4507 }
4508 
4509 /*
4510  * Syscall to push extended attribute configuration information into the
4511  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4512  * a command (int cmd), and attribute name and misc data.  For now, the
4513  * attribute name is left in userspace for consumption by the VFS_op.
4514  * It will probably be changed to be copied into sysspace by the
4515  * syscall in the future, once issues with various consumers of the
4516  * attribute code have raised their hands.
4517  *
4518  * Currently this is used only by UFS Extended Attributes.
4519  */
4520 int
4521 extattrctl(td, uap)
4522 	struct thread *td;
4523 	struct extattrctl_args /* {
4524 		syscallarg(const char *) path;
4525 		syscallarg(int) cmd;
4526 		syscallarg(const char *) filename;
4527 		syscallarg(int) attrnamespace;
4528 		syscallarg(const char *) attrname;
4529 	} */ *uap;
4530 {
4531 	struct vnode *filename_vp;
4532 	struct nameidata nd;
4533 	struct mount *mp, *mp_writable;
4534 	char attrname[EXTATTR_MAXNAMELEN];
4535 	int error;
4536 
4537 	/*
4538 	 * uap->attrname is not always defined.  We check again later when we
4539 	 * invoke the VFS call so as to pass in NULL there if needed.
4540 	 */
4541 	if (uap->attrname != NULL) {
4542 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4543 		    NULL);
4544 		if (error)
4545 			return (error);
4546 	}
4547 
4548 	/*
4549 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4550 	 * which VFS_EXTATTRCTL() will later release.
4551 	 */
4552 	filename_vp = NULL;
4553 	if (uap->filename != NULL) {
4554 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4555 		    uap->filename, td);
4556 		if ((error = namei(&nd)) != 0)
4557 			return (error);
4558 		filename_vp = nd.ni_vp;
4559 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4560 	}
4561 
4562 	/* uap->path is always defined. */
4563 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4564 	if ((error = namei(&nd)) != 0) {
4565 		if (filename_vp != NULL)
4566 			vput(filename_vp);
4567 		return (error);
4568 	}
4569 	mp = nd.ni_vp->v_mount;
4570 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4571 	NDFREE(&nd, 0);
4572 	if (error) {
4573 		if (filename_vp != NULL)
4574 			vput(filename_vp);
4575 		return (error);
4576 	}
4577 
4578 	if (uap->attrname != NULL) {
4579 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4580 		    uap->attrnamespace, attrname, td);
4581 	} else {
4582 		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4583 		    uap->attrnamespace, NULL, td);
4584 	}
4585 
4586 	vn_finished_write(mp_writable);
4587 	/*
4588 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4589 	 * filename_vp, so vrele it if it is defined.
4590 	 */
4591 	if (filename_vp != NULL)
4592 		vrele(filename_vp);
4593 
4594 	return (error);
4595 }
4596 
4597 /*-
4598  * Set a named extended attribute on a file or directory
4599  *
4600  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4601  *            kernelspace string pointer "attrname", userspace buffer
4602  *            pointer "data", buffer length "nbytes", thread "td".
4603  * Returns: 0 on success, an error number otherwise
4604  * Locks: none
4605  * References: vp must be a valid reference for the duration of the call
4606  */
4607 static int
4608 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4609     void *data, size_t nbytes, struct thread *td)
4610 {
4611 	struct mount *mp;
4612 	struct uio auio;
4613 	struct iovec aiov;
4614 	ssize_t cnt;
4615 	int error;
4616 
4617 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4618 		return (error);
4619 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4620 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4621 
4622 	aiov.iov_base = data;
4623 	aiov.iov_len = nbytes;
4624 	auio.uio_iov = &aiov;
4625 	auio.uio_iovcnt = 1;
4626 	auio.uio_offset = 0;
4627 	if (nbytes > INT_MAX) {
4628 		error = EINVAL;
4629 		goto done;
4630 	}
4631 	auio.uio_resid = nbytes;
4632 	auio.uio_rw = UIO_WRITE;
4633 	auio.uio_segflg = UIO_USERSPACE;
4634 	auio.uio_td = td;
4635 	cnt = nbytes;
4636 
4637 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4638 	    td->td_ucred, td);
4639 	cnt -= auio.uio_resid;
4640 	td->td_retval[0] = cnt;
4641 
4642 done:
4643 	VOP_UNLOCK(vp, 0, td);
4644 	vn_finished_write(mp);
4645 	return (error);
4646 }
4647 
4648 int
4649 extattr_set_file(td, uap)
4650 	struct thread *td;
4651 	struct extattr_set_file_args /* {
4652 		syscallarg(const char *) path;
4653 		syscallarg(int) attrnamespace;
4654 		syscallarg(const char *) attrname;
4655 		syscallarg(void *) data;
4656 		syscallarg(size_t) nbytes;
4657 	} */ *uap;
4658 {
4659 	struct nameidata nd;
4660 	char attrname[EXTATTR_MAXNAMELEN];
4661 	int error;
4662 
4663 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4664 	if (error)
4665 		return (error);
4666 
4667 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4668 	if ((error = namei(&nd)) != 0)
4669 		return (error);
4670 	NDFREE(&nd, NDF_ONLY_PNBUF);
4671 
4672 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4673 	    uap->data, uap->nbytes, td);
4674 
4675 	vrele(nd.ni_vp);
4676 	return (error);
4677 }
4678 
4679 int
4680 extattr_set_fd(td, uap)
4681 	struct thread *td;
4682 	struct extattr_set_fd_args /* {
4683 		syscallarg(int) fd;
4684 		syscallarg(int) attrnamespace;
4685 		syscallarg(const char *) attrname;
4686 		syscallarg(void *) data;
4687 		syscallarg(size_t) nbytes;
4688 	} */ *uap;
4689 {
4690 	struct file *fp;
4691 	char attrname[EXTATTR_MAXNAMELEN];
4692 	int error;
4693 
4694 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4695 	if (error)
4696 		return (error);
4697 
4698 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4699 		return (error);
4700 
4701 	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4702 	    attrname, uap->data, uap->nbytes, td);
4703 	fdrop(fp, td);
4704 
4705 	return (error);
4706 }
4707 
4708 /*-
4709  * Get a named extended attribute on a file or directory
4710  *
4711  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4712  *            kernelspace string pointer "attrname", userspace buffer
4713  *            pointer "data", buffer length "nbytes", thread "td".
4714  * Returns: 0 on success, an error number otherwise
4715  * Locks: none
4716  * References: vp must be a valid reference for the duration of the call
4717  */
4718 static int
4719 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4720     void *data, size_t nbytes, struct thread *td)
4721 {
4722 	struct uio auio, *auiop;
4723 	struct iovec aiov;
4724 	ssize_t cnt;
4725 	size_t size, *sizep;
4726 	int error;
4727 
4728 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4729 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4730 
4731 	/*
4732 	 * Slightly unusual semantics: if the user provides a NULL data
4733 	 * pointer, they don't want to receive the data, just the
4734 	 * maximum read length.
4735 	 */
4736 	auiop = NULL;
4737 	sizep = NULL;
4738 	cnt = 0;
4739 	if (data != NULL) {
4740 		aiov.iov_base = data;
4741 		aiov.iov_len = nbytes;
4742 		auio.uio_iov = &aiov;
4743 		auio.uio_offset = 0;
4744 		if (nbytes > INT_MAX) {
4745 			error = EINVAL;
4746 			goto done;
4747 		}
4748 		auio.uio_resid = nbytes;
4749 		auio.uio_rw = UIO_READ;
4750 		auio.uio_segflg = UIO_USERSPACE;
4751 		auio.uio_td = td;
4752 		auiop = &auio;
4753 		cnt = nbytes;
4754 	} else
4755 		sizep = &size;
4756 
4757 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4758 	    td->td_ucred, td);
4759 
4760 	if (auiop != NULL) {
4761 		cnt -= auio.uio_resid;
4762 		td->td_retval[0] = cnt;
4763 	} else
4764 		td->td_retval[0] = size;
4765 
4766 done:
4767 	VOP_UNLOCK(vp, 0, td);
4768 	return (error);
4769 }
4770 
4771 int
4772 extattr_get_file(td, uap)
4773 	struct thread *td;
4774 	struct extattr_get_file_args /* {
4775 		syscallarg(const char *) path;
4776 		syscallarg(int) attrnamespace;
4777 		syscallarg(const char *) attrname;
4778 		syscallarg(void *) data;
4779 		syscallarg(size_t) nbytes;
4780 	} */ *uap;
4781 {
4782 	struct nameidata nd;
4783 	char attrname[EXTATTR_MAXNAMELEN];
4784 	int error;
4785 
4786 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4787 	if (error)
4788 		return (error);
4789 
4790 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4791 	if ((error = namei(&nd)) != 0)
4792 		return (error);
4793 	NDFREE(&nd, NDF_ONLY_PNBUF);
4794 
4795 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4796 	    uap->data, uap->nbytes, td);
4797 
4798 	vrele(nd.ni_vp);
4799 	return (error);
4800 }
4801 
4802 int
4803 extattr_get_fd(td, uap)
4804 	struct thread *td;
4805 	struct extattr_get_fd_args /* {
4806 		syscallarg(int) fd;
4807 		syscallarg(int) attrnamespace;
4808 		syscallarg(const char *) attrname;
4809 		syscallarg(void *) data;
4810 		syscallarg(size_t) nbytes;
4811 	} */ *uap;
4812 {
4813 	struct file *fp;
4814 	char attrname[EXTATTR_MAXNAMELEN];
4815 	int error;
4816 
4817 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4818 	if (error)
4819 		return (error);
4820 
4821 	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4822 		return (error);
4823 
4824 	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4825 	    attrname, uap->data, uap->nbytes, td);
4826 
4827 	fdrop(fp, td);
4828 	return (error);
4829 }
4830 
4831 /*
4832  * extattr_delete_vp(): Delete a named extended attribute on a file or
4833  *                      directory
4834  *
4835  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4836  *            kernelspace string pointer "attrname", proc "p"
4837  * Returns: 0 on success, an error number otherwise
4838  * Locks: none
4839  * References: vp must be a valid reference for the duration of the call
4840  */
4841 static int
4842 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4843     struct thread *td)
4844 {
4845 	struct mount *mp;
4846 	int error;
4847 
4848 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4849 		return (error);
4850 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4851 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4852 
4853 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4854 	    td);
4855 
4856 	VOP_UNLOCK(vp, 0, td);
4857 	vn_finished_write(mp);
4858 	return (error);
4859 }
4860 
4861 int
4862 extattr_delete_file(td, uap)
4863 	struct thread *td;
4864 	struct extattr_delete_file_args /* {
4865 		syscallarg(const char *) path;
4866 		syscallarg(int) attrnamespace;
4867 		syscallarg(const char *) attrname;
4868 	} */ *uap;
4869 {
4870 	struct nameidata nd;
4871 	char attrname[EXTATTR_MAXNAMELEN];
4872 	int error;
4873 
4874 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4875 	if (error)
4876 		return(error);
4877 
4878 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4879 	if ((error = namei(&nd)) != 0)
4880 		return(error);
4881 	NDFREE(&nd, NDF_ONLY_PNBUF);
4882 
4883 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4884 
4885 	vrele(nd.ni_vp);
4886 	return(error);
4887 }
4888 
4889 int
4890 extattr_delete_fd(td, uap)
4891 	struct thread *td;
4892 	struct extattr_delete_fd_args /* {
4893 		syscallarg(int) fd;
4894 		syscallarg(int) attrnamespace;
4895 		syscallarg(const char *) attrname;
4896 	} */ *uap;
4897 {
4898 	struct file *fp;
4899 	struct vnode *vp;
4900 	char attrname[EXTATTR_MAXNAMELEN];
4901 	int error;
4902 
4903 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4904 	if (error)
4905 		return (error);
4906 
4907 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4908 		return (error);
4909 	vp = (struct vnode *)fp->f_data;
4910 
4911 	error = extattr_delete_vp((struct vnode *)fp->f_data,
4912 	    uap->attrnamespace, attrname, td);
4913 
4914 	fdrop(fp, td);
4915 	return (error);
4916 }
4917