xref: /freebsd/sys/kern/vfs_syscalls.c (revision 94942af266ac119ede0ca836f9aa5a5ac0582938)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/priv.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 /*
92  * The module initialization routine for POSIX asynchronous I/O will
93  * set this to the version of AIO that it implements.  (Zero means
94  * that it is not implemented.)  This value is used here by pathconf()
95  * and in kern_descrip.c by fpathconf().
96  */
97 int async_io_version;
98 
99 #ifdef DEBUG
100 static int syncprt = 0;
101 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
102 #endif
103 
104 /*
105  * Sync each mounted filesystem.
106  */
107 #ifndef _SYS_SYSPROTO_H_
108 struct sync_args {
109 	int     dummy;
110 };
111 #endif
112 /* ARGSUSED */
113 int
114 sync(td, uap)
115 	struct thread *td;
116 	struct sync_args *uap;
117 {
118 	struct mount *mp, *nmp;
119 	int vfslocked;
120 
121 	mtx_lock(&mountlist_mtx);
122 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124 			nmp = TAILQ_NEXT(mp, mnt_list);
125 			continue;
126 		}
127 		vfslocked = VFS_LOCK_GIANT(mp);
128 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130 			MNT_ILOCK(mp);
131 			mp->mnt_noasync++;
132 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133 			MNT_IUNLOCK(mp);
134 			vfs_msync(mp, MNT_NOWAIT);
135 			VFS_SYNC(mp, MNT_NOWAIT, td);
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync--;
138 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139 			    mp->mnt_noasync == 0)
140 				mp->mnt_kern_flag |= MNTK_ASYNC;
141 			MNT_IUNLOCK(mp);
142 			vn_finished_write(mp);
143 		}
144 		VFS_UNLOCK_GIANT(vfslocked);
145 		mtx_lock(&mountlist_mtx);
146 		nmp = TAILQ_NEXT(mp, mnt_list);
147 		vfs_unbusy(mp, td);
148 	}
149 	mtx_unlock(&mountlist_mtx);
150 	return (0);
151 }
152 
153 /* XXX PRISON: could be per prison flag */
154 static int prison_quotas;
155 #if 0
156 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157 #endif
158 
159 /*
160  * Change filesystem quotas.
161  */
162 #ifndef _SYS_SYSPROTO_H_
163 struct quotactl_args {
164 	char *path;
165 	int cmd;
166 	int uid;
167 	caddr_t arg;
168 };
169 #endif
170 int
171 quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		char *path;
175 		int cmd;
176 		int uid;
177 		caddr_t arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int vfslocked;
182 	int error;
183 	struct nameidata nd;
184 
185 	AUDIT_ARG(cmd, uap->cmd);
186 	AUDIT_ARG(uid, uap->uid);
187 	if (jailed(td->td_ucred) && !prison_quotas)
188 		return (EPERM);
189 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
190 	   UIO_USERSPACE, uap->path, td);
191 	if ((error = namei(&nd)) != 0)
192 		return (error);
193 	vfslocked = NDHASGIANT(&nd);
194 	NDFREE(&nd, NDF_ONLY_PNBUF);
195 	mp = nd.ni_vp->v_mount;
196 	if ((error = vfs_busy(mp, 0, NULL, td))) {
197 		vrele(nd.ni_vp);
198 		VFS_UNLOCK_GIANT(vfslocked);
199 		return (error);
200 	}
201 	vrele(nd.ni_vp);
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vfs_unbusy(mp, td);
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error)
257 		goto out;
258 #endif
259 	/*
260 	 * Set these in case the underlying filesystem fails to do so.
261 	 */
262 	sp = &mp->mnt_stat;
263 	sp->f_version = STATFS_VERSION;
264 	sp->f_namemax = NAME_MAX;
265 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266 	error = VFS_STATFS(mp, sp, td);
267 	if (error)
268 		goto out;
269 	if (priv_check(td, PRIV_VFS_GENERATION)) {
270 		bcopy(sp, &sb, sizeof(sb));
271 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
272 		prison_enforce_statfs(td->td_ucred, mp, &sb);
273 		sp = &sb;
274 	}
275 	*buf = *sp;
276 out:
277 	vfs_rel(mp);
278 	VFS_UNLOCK_GIANT(vfslocked);
279 	if (mtx_owned(&Giant))
280 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281 	return (error);
282 }
283 
284 /*
285  * Get filesystem statistics.
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct fstatfs_args {
289 	int fd;
290 	struct statfs *buf;
291 };
292 #endif
293 int
294 fstatfs(td, uap)
295 	struct thread *td;
296 	register struct fstatfs_args /* {
297 		int fd;
298 		struct statfs *buf;
299 	} */ *uap;
300 {
301 	struct statfs sf;
302 	int error;
303 
304 	error = kern_fstatfs(td, uap->fd, &sf);
305 	if (error == 0)
306 		error = copyout(&sf, uap->buf, sizeof(sf));
307 	return (error);
308 }
309 
310 int
311 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312 {
313 	struct file *fp;
314 	struct mount *mp;
315 	struct statfs *sp, sb;
316 	int vfslocked;
317 	struct vnode *vp;
318 	int error;
319 
320 	AUDIT_ARG(fd, fd);
321 	error = getvnode(td->td_proc->p_fd, fd, &fp);
322 	if (error)
323 		return (error);
324 	vp = fp->f_vnode;
325 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327 #ifdef AUDIT
328 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329 #endif
330 	mp = vp->v_mount;
331 	if (mp)
332 		vfs_ref(mp);
333 	VOP_UNLOCK(vp, 0, td);
334 	fdrop(fp, td);
335 	if (vp->v_iflag & VI_DOOMED) {
336 		error = EBADF;
337 		goto out;
338 	}
339 #ifdef MAC
340 	error = mac_check_mount_stat(td->td_ucred, mp);
341 	if (error)
342 		goto out;
343 #endif
344 	/*
345 	 * Set these in case the underlying filesystem fails to do so.
346 	 */
347 	sp = &mp->mnt_stat;
348 	sp->f_version = STATFS_VERSION;
349 	sp->f_namemax = NAME_MAX;
350 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351 	error = VFS_STATFS(mp, sp, td);
352 	if (error)
353 		goto out;
354 	if (priv_check(td, PRIV_VFS_GENERATION)) {
355 		bcopy(sp, &sb, sizeof(sb));
356 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
357 		prison_enforce_statfs(td->td_ucred, mp, &sb);
358 		sp = &sb;
359 	}
360 	*buf = *sp;
361 out:
362 	if (mp)
363 		vfs_rel(mp);
364 	VFS_UNLOCK_GIANT(vfslocked);
365 	return (error);
366 }
367 
368 /*
369  * Get statistics on all filesystems.
370  */
371 #ifndef _SYS_SYSPROTO_H_
372 struct getfsstat_args {
373 	struct statfs *buf;
374 	long bufsize;
375 	int flags;
376 };
377 #endif
378 int
379 getfsstat(td, uap)
380 	struct thread *td;
381 	register struct getfsstat_args /* {
382 		struct statfs *buf;
383 		long bufsize;
384 		int flags;
385 	} */ *uap;
386 {
387 
388 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
389 	    uap->flags));
390 }
391 
392 /*
393  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
394  * 	The caller is responsible for freeing memory which will be allocated
395  *	in '*buf'.
396  */
397 int
398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
399     enum uio_seg bufseg, int flags)
400 {
401 	struct mount *mp, *nmp;
402 	struct statfs *sfsp, *sp, sb;
403 	size_t count, maxcount;
404 	int vfslocked;
405 	int error;
406 
407 	maxcount = bufsize / sizeof(struct statfs);
408 	if (bufsize == 0)
409 		sfsp = NULL;
410 	else if (bufseg == UIO_USERSPACE)
411 		sfsp = *buf;
412 	else /* if (bufseg == UIO_SYSSPACE) */ {
413 		count = 0;
414 		mtx_lock(&mountlist_mtx);
415 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
416 			count++;
417 		}
418 		mtx_unlock(&mountlist_mtx);
419 		if (maxcount > count)
420 			maxcount = count;
421 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
422 		    M_WAITOK);
423 	}
424 	count = 0;
425 	mtx_lock(&mountlist_mtx);
426 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
427 		if (prison_canseemount(td->td_ucred, mp) != 0) {
428 			nmp = TAILQ_NEXT(mp, mnt_list);
429 			continue;
430 		}
431 #ifdef MAC
432 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #endif
437 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 		vfslocked = VFS_LOCK_GIANT(mp);
442 		if (sfsp && count < maxcount) {
443 			sp = &mp->mnt_stat;
444 			/*
445 			 * Set these in case the underlying filesystem
446 			 * fails to do so.
447 			 */
448 			sp->f_version = STATFS_VERSION;
449 			sp->f_namemax = NAME_MAX;
450 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
451 			/*
452 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
453 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
454 			 * overrides MNT_WAIT.
455 			 */
456 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
457 			    (flags & MNT_WAIT)) &&
458 			    (error = VFS_STATFS(mp, sp, td))) {
459 				VFS_UNLOCK_GIANT(vfslocked);
460 				mtx_lock(&mountlist_mtx);
461 				nmp = TAILQ_NEXT(mp, mnt_list);
462 				vfs_unbusy(mp, td);
463 				continue;
464 			}
465 			if (priv_check(td, PRIV_VFS_GENERATION)) {
466 				bcopy(sp, &sb, sizeof(sb));
467 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
468 				prison_enforce_statfs(td->td_ucred, mp, &sb);
469 				sp = &sb;
470 			}
471 			if (bufseg == UIO_SYSSPACE)
472 				bcopy(sp, sfsp, sizeof(*sp));
473 			else /* if (bufseg == UIO_USERSPACE) */ {
474 				error = copyout(sp, sfsp, sizeof(*sp));
475 				if (error) {
476 					vfs_unbusy(mp, td);
477 					VFS_UNLOCK_GIANT(vfslocked);
478 					return (error);
479 				}
480 			}
481 			sfsp++;
482 		}
483 		VFS_UNLOCK_GIANT(vfslocked);
484 		count++;
485 		mtx_lock(&mountlist_mtx);
486 		nmp = TAILQ_NEXT(mp, mnt_list);
487 		vfs_unbusy(mp, td);
488 	}
489 	mtx_unlock(&mountlist_mtx);
490 	if (sfsp && count > maxcount)
491 		td->td_retval[0] = maxcount;
492 	else
493 		td->td_retval[0] = count;
494 	return (0);
495 }
496 
497 #ifdef COMPAT_FREEBSD4
498 /*
499  * Get old format filesystem statistics.
500  */
501 static void cvtstatfs(struct statfs *, struct ostatfs *);
502 
503 #ifndef _SYS_SYSPROTO_H_
504 struct freebsd4_statfs_args {
505 	char *path;
506 	struct ostatfs *buf;
507 };
508 #endif
509 int
510 freebsd4_statfs(td, uap)
511 	struct thread *td;
512 	struct freebsd4_statfs_args /* {
513 		char *path;
514 		struct ostatfs *buf;
515 	} */ *uap;
516 {
517 	struct ostatfs osb;
518 	struct statfs sf;
519 	int error;
520 
521 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
522 	if (error)
523 		return (error);
524 	cvtstatfs(&sf, &osb);
525 	return (copyout(&osb, uap->buf, sizeof(osb)));
526 }
527 
528 /*
529  * Get filesystem statistics.
530  */
531 #ifndef _SYS_SYSPROTO_H_
532 struct freebsd4_fstatfs_args {
533 	int fd;
534 	struct ostatfs *buf;
535 };
536 #endif
537 int
538 freebsd4_fstatfs(td, uap)
539 	struct thread *td;
540 	struct freebsd4_fstatfs_args /* {
541 		int fd;
542 		struct ostatfs *buf;
543 	} */ *uap;
544 {
545 	struct ostatfs osb;
546 	struct statfs sf;
547 	int error;
548 
549 	error = kern_fstatfs(td, uap->fd, &sf);
550 	if (error)
551 		return (error);
552 	cvtstatfs(&sf, &osb);
553 	return (copyout(&osb, uap->buf, sizeof(osb)));
554 }
555 
556 /*
557  * Get statistics on all filesystems.
558  */
559 #ifndef _SYS_SYSPROTO_H_
560 struct freebsd4_getfsstat_args {
561 	struct ostatfs *buf;
562 	long bufsize;
563 	int flags;
564 };
565 #endif
566 int
567 freebsd4_getfsstat(td, uap)
568 	struct thread *td;
569 	register struct freebsd4_getfsstat_args /* {
570 		struct ostatfs *buf;
571 		long bufsize;
572 		int flags;
573 	} */ *uap;
574 {
575 	struct statfs *buf, *sp;
576 	struct ostatfs osb;
577 	size_t count, size;
578 	int error;
579 
580 	count = uap->bufsize / sizeof(struct ostatfs);
581 	size = count * sizeof(struct statfs);
582 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
583 	if (size > 0) {
584 		count = td->td_retval[0];
585 		sp = buf;
586 		while (count > 0 && error == 0) {
587 			cvtstatfs(sp, &osb);
588 			error = copyout(&osb, uap->buf, sizeof(osb));
589 			sp++;
590 			uap->buf++;
591 			count--;
592 		}
593 		free(buf, M_TEMP);
594 	}
595 	return (error);
596 }
597 
598 /*
599  * Implement fstatfs() for (NFS) file handles.
600  */
601 #ifndef _SYS_SYSPROTO_H_
602 struct freebsd4_fhstatfs_args {
603 	struct fhandle *u_fhp;
604 	struct ostatfs *buf;
605 };
606 #endif
607 int
608 freebsd4_fhstatfs(td, uap)
609 	struct thread *td;
610 	struct freebsd4_fhstatfs_args /* {
611 		struct fhandle *u_fhp;
612 		struct ostatfs *buf;
613 	} */ *uap;
614 {
615 	struct ostatfs osb;
616 	struct statfs sf;
617 	fhandle_t fh;
618 	int error;
619 
620 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621 	if (error)
622 		return (error);
623 	error = kern_fhstatfs(td, fh, &sf);
624 	if (error)
625 		return (error);
626 	cvtstatfs(&sf, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(nsp, osp)
635 	struct statfs *nsp;
636 	struct ostatfs *osp;
637 {
638 
639 	bzero(osp, sizeof(*osp));
640 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647 	osp->f_owner = nsp->f_owner;
648 	osp->f_type = nsp->f_type;
649 	osp->f_flags = nsp->f_flags;
650 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
655 	    MIN(MFSNAMELEN, OMFSNAMELEN));
656 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
657 	    MIN(MNAMELEN, OMNAMELEN));
658 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
659 	    MIN(MNAMELEN, OMNAMELEN));
660 	osp->f_fsid = nsp->f_fsid;
661 }
662 #endif /* COMPAT_FREEBSD4 */
663 
664 /*
665  * Change current working directory to a given file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct fchdir_args {
669 	int	fd;
670 };
671 #endif
672 int
673 fchdir(td, uap)
674 	struct thread *td;
675 	struct fchdir_args /* {
676 		int fd;
677 	} */ *uap;
678 {
679 	register struct filedesc *fdp = td->td_proc->p_fd;
680 	struct vnode *vp, *tdp, *vpold;
681 	struct mount *mp;
682 	struct file *fp;
683 	int vfslocked;
684 	int error;
685 
686 	AUDIT_ARG(fd, uap->fd);
687 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
688 		return (error);
689 	vp = fp->f_vnode;
690 	VREF(vp);
691 	fdrop(fp, td);
692 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
693 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
694 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
695 	error = change_dir(vp, td);
696 	while (!error && (mp = vp->v_mountedhere) != NULL) {
697 		int tvfslocked;
698 		if (vfs_busy(mp, 0, 0, td))
699 			continue;
700 		tvfslocked = VFS_LOCK_GIANT(mp);
701 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
702 		vfs_unbusy(mp, td);
703 		if (error) {
704 			VFS_UNLOCK_GIANT(tvfslocked);
705 			break;
706 		}
707 		vput(vp);
708 		VFS_UNLOCK_GIANT(vfslocked);
709 		vp = tdp;
710 		vfslocked = tvfslocked;
711 	}
712 	if (error) {
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		return (error);
716 	}
717 	VOP_UNLOCK(vp, 0, td);
718 	VFS_UNLOCK_GIANT(vfslocked);
719 	FILEDESC_XLOCK(fdp);
720 	vpold = fdp->fd_cdir;
721 	fdp->fd_cdir = vp;
722 	FILEDESC_XUNLOCK(fdp);
723 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
724 	vrele(vpold);
725 	VFS_UNLOCK_GIANT(vfslocked);
726 	return (0);
727 }
728 
729 /*
730  * Change current working directory (``.'').
731  */
732 #ifndef _SYS_SYSPROTO_H_
733 struct chdir_args {
734 	char	*path;
735 };
736 #endif
737 int
738 chdir(td, uap)
739 	struct thread *td;
740 	struct chdir_args /* {
741 		char *path;
742 	} */ *uap;
743 {
744 
745 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746 }
747 
748 int
749 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750 {
751 	register struct filedesc *fdp = td->td_proc->p_fd;
752 	int error;
753 	struct nameidata nd;
754 	struct vnode *vp;
755 	int vfslocked;
756 
757 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
758 	    pathseg, path, td);
759 	if ((error = namei(&nd)) != 0)
760 		return (error);
761 	vfslocked = NDHASGIANT(&nd);
762 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
763 		vput(nd.ni_vp);
764 		VFS_UNLOCK_GIANT(vfslocked);
765 		NDFREE(&nd, NDF_ONLY_PNBUF);
766 		return (error);
767 	}
768 	VOP_UNLOCK(nd.ni_vp, 0, td);
769 	VFS_UNLOCK_GIANT(vfslocked);
770 	NDFREE(&nd, NDF_ONLY_PNBUF);
771 	FILEDESC_XLOCK(fdp);
772 	vp = fdp->fd_cdir;
773 	fdp->fd_cdir = nd.ni_vp;
774 	FILEDESC_XUNLOCK(fdp);
775 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
776 	vrele(vp);
777 	VFS_UNLOCK_GIANT(vfslocked);
778 	return (0);
779 }
780 
781 /*
782  * Helper function for raised chroot(2) security function:  Refuse if
783  * any filedescriptors are open directories.
784  */
785 static int
786 chroot_refuse_vdir_fds(fdp)
787 	struct filedesc *fdp;
788 {
789 	struct vnode *vp;
790 	struct file *fp;
791 	int fd;
792 
793 	FILEDESC_LOCK_ASSERT(fdp);
794 
795 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796 		fp = fget_locked(fdp, fd);
797 		if (fp == NULL)
798 			continue;
799 		if (fp->f_type == DTYPE_VNODE) {
800 			vp = fp->f_vnode;
801 			if (vp->v_type == VDIR)
802 				return (EPERM);
803 		}
804 	}
805 	return (0);
806 }
807 
808 /*
809  * This sysctl determines if we will allow a process to chroot(2) if it
810  * has a directory open:
811  *	0: disallowed for all processes.
812  *	1: allowed for processes that were not already chroot(2)'ed.
813  *	2: allowed for all processes.
814  */
815 
816 static int chroot_allow_open_directories = 1;
817 
818 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819      &chroot_allow_open_directories, 0, "");
820 
821 /*
822  * Change notion of root (``/'') directory.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct chroot_args {
826 	char	*path;
827 };
828 #endif
829 int
830 chroot(td, uap)
831 	struct thread *td;
832 	struct chroot_args /* {
833 		char *path;
834 	} */ *uap;
835 {
836 	int error;
837 	struct nameidata nd;
838 	int vfslocked;
839 
840 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
841 	    SUSER_ALLOWJAIL);
842 	if (error)
843 		return (error);
844 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
845 	    UIO_USERSPACE, uap->path, td);
846 	error = namei(&nd);
847 	if (error)
848 		goto error;
849 	vfslocked = NDHASGIANT(&nd);
850 	if ((error = change_dir(nd.ni_vp, td)) != 0)
851 		goto e_vunlock;
852 #ifdef MAC
853 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
854 		goto e_vunlock;
855 #endif
856 	VOP_UNLOCK(nd.ni_vp, 0, td);
857 	error = change_root(nd.ni_vp, td);
858 	vrele(nd.ni_vp);
859 	VFS_UNLOCK_GIANT(vfslocked);
860 	NDFREE(&nd, NDF_ONLY_PNBUF);
861 	return (error);
862 e_vunlock:
863 	vput(nd.ni_vp);
864 	VFS_UNLOCK_GIANT(vfslocked);
865 error:
866 	NDFREE(&nd, NDF_ONLY_PNBUF);
867 	return (error);
868 }
869 
870 /*
871  * Common routine for chroot and chdir.  Callers must provide a locked vnode
872  * instance.
873  */
874 int
875 change_dir(vp, td)
876 	struct vnode *vp;
877 	struct thread *td;
878 {
879 	int error;
880 
881 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
882 	if (vp->v_type != VDIR)
883 		return (ENOTDIR);
884 #ifdef MAC
885 	error = mac_check_vnode_chdir(td->td_ucred, vp);
886 	if (error)
887 		return (error);
888 #endif
889 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
890 	return (error);
891 }
892 
893 /*
894  * Common routine for kern_chroot() and jail_attach().  The caller is
895  * responsible for invoking priv_check() and mac_check_chroot() to authorize
896  * this operation.
897  */
898 int
899 change_root(vp, td)
900 	struct vnode *vp;
901 	struct thread *td;
902 {
903 	struct filedesc *fdp;
904 	struct vnode *oldvp;
905 	int vfslocked;
906 	int error;
907 
908 	VFS_ASSERT_GIANT(vp->v_mount);
909 	fdp = td->td_proc->p_fd;
910 	FILEDESC_XLOCK(fdp);
911 	if (chroot_allow_open_directories == 0 ||
912 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
913 		error = chroot_refuse_vdir_fds(fdp);
914 		if (error) {
915 			FILEDESC_XUNLOCK(fdp);
916 			return (error);
917 		}
918 	}
919 	oldvp = fdp->fd_rdir;
920 	fdp->fd_rdir = vp;
921 	VREF(fdp->fd_rdir);
922 	if (!fdp->fd_jdir) {
923 		fdp->fd_jdir = vp;
924 		VREF(fdp->fd_jdir);
925 	}
926 	FILEDESC_XUNLOCK(fdp);
927 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
928 	vrele(oldvp);
929 	VFS_UNLOCK_GIANT(vfslocked);
930 	return (0);
931 }
932 
933 /*
934  * Check permissions, allocate an open file structure, and call the device
935  * open routine if any.
936  */
937 #ifndef _SYS_SYSPROTO_H_
938 struct open_args {
939 	char	*path;
940 	int	flags;
941 	int	mode;
942 };
943 #endif
944 int
945 open(td, uap)
946 	struct thread *td;
947 	register struct open_args /* {
948 		char *path;
949 		int flags;
950 		int mode;
951 	} */ *uap;
952 {
953 
954 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
955 }
956 
957 int
958 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
959     int mode)
960 {
961 	struct proc *p = td->td_proc;
962 	struct filedesc *fdp = p->p_fd;
963 	struct file *fp;
964 	struct vnode *vp;
965 	struct vattr vat;
966 	struct mount *mp;
967 	int cmode;
968 	struct file *nfp;
969 	int type, indx, error;
970 	struct flock lf;
971 	struct nameidata nd;
972 	int vfslocked;
973 
974 	AUDIT_ARG(fflags, flags);
975 	AUDIT_ARG(mode, mode);
976 	if ((flags & O_ACCMODE) == O_ACCMODE)
977 		return (EINVAL);
978 	flags = FFLAGS(flags);
979 	error = falloc(td, &nfp, &indx);
980 	if (error)
981 		return (error);
982 	/* An extra reference on `nfp' has been held for us by falloc(). */
983 	fp = nfp;
984 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
985 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
986 	td->td_dupfd = -1;		/* XXX check for fdopen */
987 	FILEDESC_XLOCK(fdp);
988 	if (fp != fdp->fd_ofiles[indx]) {
989 		FILEDESC_XUNLOCK(fdp);
990 		fdrop(fp, td);
991 		td->td_retval[0] = indx;
992 		return (0);
993 	}
994 	fdp->fd_ofileflags[indx] |= UF_OPENING;
995 	FILEDESC_XUNLOCK(fdp);
996 	error = vn_open(&nd, &flags, cmode, indx);
997 	FILEDESC_XLOCK(fdp);
998 	fdp->fd_ofileflags[indx] &= ~UF_OPENING;
999 	FILEDESC_XUNLOCK(fdp);
1000 	if (error) {
1001 		/*
1002 		 * If the vn_open replaced the method vector, something
1003 		 * wonderous happened deep below and we just pass it up
1004 		 * pretending we know what we do.
1005 		 */
1006 		if (error == ENXIO && fp->f_ops != &badfileops) {
1007 			fdrop(fp, td);
1008 			td->td_retval[0] = indx;
1009 			return (0);
1010 		}
1011 
1012 		/*
1013 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1014 		 * responsible for dropping the old contents of ofiles[indx]
1015 		 * if it succeeds.
1016 		 */
1017 		if ((error == ENODEV || error == ENXIO) &&
1018 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1019 		    (error =
1020 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1021 			td->td_retval[0] = indx;
1022 			fdrop(fp, td);
1023 			return (0);
1024 		}
1025 		/*
1026 		 * Clean up the descriptor, but only if another thread hadn't
1027 		 * replaced or closed it.
1028 		 */
1029 		fdclose(fdp, fp, indx, td);
1030 		fdrop(fp, td);
1031 
1032 		if (error == ERESTART)
1033 			error = EINTR;
1034 		return (error);
1035 	}
1036 	td->td_dupfd = 0;
1037 	vfslocked = NDHASGIANT(&nd);
1038 	NDFREE(&nd, NDF_ONLY_PNBUF);
1039 	vp = nd.ni_vp;
1040 
1041 	FILE_LOCK(fp);
1042 	fp->f_vnode = vp;
1043 	if (fp->f_data == NULL)
1044 		fp->f_data = vp;
1045 	fp->f_flag = flags & FMASK;
1046 	fp->f_seqcount = 1;
1047 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1048 	if (fp->f_ops == &badfileops)
1049 		fp->f_ops = &vnops;
1050 	FILE_UNLOCK(fp);
1051 
1052 	VOP_UNLOCK(vp, 0, td);
1053 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1054 		lf.l_whence = SEEK_SET;
1055 		lf.l_start = 0;
1056 		lf.l_len = 0;
1057 		if (flags & O_EXLOCK)
1058 			lf.l_type = F_WRLCK;
1059 		else
1060 			lf.l_type = F_RDLCK;
1061 		type = F_FLOCK;
1062 		if ((flags & FNONBLOCK) == 0)
1063 			type |= F_WAIT;
1064 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1065 			    type)) != 0)
1066 			goto bad;
1067 		fp->f_flag |= FHASLOCK;
1068 	}
1069 	if (flags & O_TRUNC) {
1070 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1071 			goto bad;
1072 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1073 		VATTR_NULL(&vat);
1074 		vat.va_size = 0;
1075 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1076 #ifdef MAC
1077 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1078 		if (error == 0)
1079 #endif
1080 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1081 		VOP_UNLOCK(vp, 0, td);
1082 		vn_finished_write(mp);
1083 		if (error)
1084 			goto bad;
1085 	}
1086 	VFS_UNLOCK_GIANT(vfslocked);
1087 	/*
1088 	 * Release our private reference, leaving the one associated with
1089 	 * the descriptor table intact.
1090 	 */
1091 	fdrop(fp, td);
1092 	td->td_retval[0] = indx;
1093 	return (0);
1094 bad:
1095 	VFS_UNLOCK_GIANT(vfslocked);
1096 	fdclose(fdp, fp, indx, td);
1097 	fdrop(fp, td);
1098 	return (error);
1099 }
1100 
1101 #ifdef COMPAT_43
1102 /*
1103  * Create a file.
1104  */
1105 #ifndef _SYS_SYSPROTO_H_
1106 struct ocreat_args {
1107 	char	*path;
1108 	int	mode;
1109 };
1110 #endif
1111 int
1112 ocreat(td, uap)
1113 	struct thread *td;
1114 	register struct ocreat_args /* {
1115 		char *path;
1116 		int mode;
1117 	} */ *uap;
1118 {
1119 
1120 	return (kern_open(td, uap->path, UIO_USERSPACE,
1121 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1122 }
1123 #endif /* COMPAT_43 */
1124 
1125 /*
1126  * Create a special file.
1127  */
1128 #ifndef _SYS_SYSPROTO_H_
1129 struct mknod_args {
1130 	char	*path;
1131 	int	mode;
1132 	int	dev;
1133 };
1134 #endif
1135 int
1136 mknod(td, uap)
1137 	struct thread *td;
1138 	register struct mknod_args /* {
1139 		char *path;
1140 		int mode;
1141 		int dev;
1142 	} */ *uap;
1143 {
1144 
1145 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1146 }
1147 
1148 int
1149 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1150     int dev)
1151 {
1152 	struct vnode *vp;
1153 	struct mount *mp;
1154 	struct vattr vattr;
1155 	int error;
1156 	int whiteout = 0;
1157 	struct nameidata nd;
1158 	int vfslocked;
1159 
1160 	AUDIT_ARG(mode, mode);
1161 	AUDIT_ARG(dev, dev);
1162 	switch (mode & S_IFMT) {
1163 	case S_IFCHR:
1164 	case S_IFBLK:
1165 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1166 		break;
1167 	case S_IFMT:
1168 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1169 		break;
1170 	case S_IFWHT:
1171 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1172 		break;
1173 	default:
1174 		error = EINVAL;
1175 		break;
1176 	}
1177 	if (error)
1178 		return (error);
1179 restart:
1180 	bwillwrite();
1181 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1182 	    pathseg, path, td);
1183 	if ((error = namei(&nd)) != 0)
1184 		return (error);
1185 	vfslocked = NDHASGIANT(&nd);
1186 	vp = nd.ni_vp;
1187 	if (vp != NULL) {
1188 		NDFREE(&nd, NDF_ONLY_PNBUF);
1189 		if (vp == nd.ni_dvp)
1190 			vrele(nd.ni_dvp);
1191 		else
1192 			vput(nd.ni_dvp);
1193 		vrele(vp);
1194 		VFS_UNLOCK_GIANT(vfslocked);
1195 		return (EEXIST);
1196 	} else {
1197 		VATTR_NULL(&vattr);
1198 		FILEDESC_SLOCK(td->td_proc->p_fd);
1199 		vattr.va_mode = (mode & ALLPERMS) &
1200 		    ~td->td_proc->p_fd->fd_cmask;
1201 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1202 		vattr.va_rdev = dev;
1203 		whiteout = 0;
1204 
1205 		switch (mode & S_IFMT) {
1206 		case S_IFMT:	/* used by badsect to flag bad sectors */
1207 			vattr.va_type = VBAD;
1208 			break;
1209 		case S_IFCHR:
1210 			vattr.va_type = VCHR;
1211 			break;
1212 		case S_IFBLK:
1213 			vattr.va_type = VBLK;
1214 			break;
1215 		case S_IFWHT:
1216 			whiteout = 1;
1217 			break;
1218 		default:
1219 			panic("kern_mknod: invalid mode");
1220 		}
1221 	}
1222 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1223 		NDFREE(&nd, NDF_ONLY_PNBUF);
1224 		vput(nd.ni_dvp);
1225 		VFS_UNLOCK_GIANT(vfslocked);
1226 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1227 			return (error);
1228 		goto restart;
1229 	}
1230 #ifdef MAC
1231 	if (error == 0 && !whiteout)
1232 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1233 		    &nd.ni_cnd, &vattr);
1234 #endif
1235 	if (!error) {
1236 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1237 		if (whiteout)
1238 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1239 		else {
1240 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1241 						&nd.ni_cnd, &vattr);
1242 			if (error == 0)
1243 				vput(nd.ni_vp);
1244 		}
1245 	}
1246 	NDFREE(&nd, NDF_ONLY_PNBUF);
1247 	vput(nd.ni_dvp);
1248 	vn_finished_write(mp);
1249 	VFS_UNLOCK_GIANT(vfslocked);
1250 	return (error);
1251 }
1252 
1253 /*
1254  * Create a named pipe.
1255  */
1256 #ifndef _SYS_SYSPROTO_H_
1257 struct mkfifo_args {
1258 	char	*path;
1259 	int	mode;
1260 };
1261 #endif
1262 int
1263 mkfifo(td, uap)
1264 	struct thread *td;
1265 	register struct mkfifo_args /* {
1266 		char *path;
1267 		int mode;
1268 	} */ *uap;
1269 {
1270 
1271 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1272 }
1273 
1274 int
1275 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1276 {
1277 	struct mount *mp;
1278 	struct vattr vattr;
1279 	int error;
1280 	struct nameidata nd;
1281 	int vfslocked;
1282 
1283 	AUDIT_ARG(mode, mode);
1284 restart:
1285 	bwillwrite();
1286 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1287 	    pathseg, path, td);
1288 	if ((error = namei(&nd)) != 0)
1289 		return (error);
1290 	vfslocked = NDHASGIANT(&nd);
1291 	if (nd.ni_vp != NULL) {
1292 		NDFREE(&nd, NDF_ONLY_PNBUF);
1293 		if (nd.ni_vp == nd.ni_dvp)
1294 			vrele(nd.ni_dvp);
1295 		else
1296 			vput(nd.ni_dvp);
1297 		vrele(nd.ni_vp);
1298 		VFS_UNLOCK_GIANT(vfslocked);
1299 		return (EEXIST);
1300 	}
1301 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1302 		NDFREE(&nd, NDF_ONLY_PNBUF);
1303 		vput(nd.ni_dvp);
1304 		VFS_UNLOCK_GIANT(vfslocked);
1305 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1306 			return (error);
1307 		goto restart;
1308 	}
1309 	VATTR_NULL(&vattr);
1310 	vattr.va_type = VFIFO;
1311 	FILEDESC_SLOCK(td->td_proc->p_fd);
1312 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1313 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1314 #ifdef MAC
1315 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1316 	    &vattr);
1317 	if (error)
1318 		goto out;
1319 #endif
1320 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1321 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1322 	if (error == 0)
1323 		vput(nd.ni_vp);
1324 #ifdef MAC
1325 out:
1326 #endif
1327 	vput(nd.ni_dvp);
1328 	vn_finished_write(mp);
1329 	VFS_UNLOCK_GIANT(vfslocked);
1330 	NDFREE(&nd, NDF_ONLY_PNBUF);
1331 	return (error);
1332 }
1333 
1334 /*
1335  * Make a hard file link.
1336  */
1337 #ifndef _SYS_SYSPROTO_H_
1338 struct link_args {
1339 	char	*path;
1340 	char	*link;
1341 };
1342 #endif
1343 int
1344 link(td, uap)
1345 	struct thread *td;
1346 	register struct link_args /* {
1347 		char *path;
1348 		char *link;
1349 	} */ *uap;
1350 {
1351 	int error;
1352 
1353 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1354 	return (error);
1355 }
1356 
1357 static int hardlink_check_uid = 0;
1358 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1359     &hardlink_check_uid, 0,
1360     "Unprivileged processes cannot create hard links to files owned by other "
1361     "users");
1362 static int hardlink_check_gid = 0;
1363 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1364     &hardlink_check_gid, 0,
1365     "Unprivileged processes cannot create hard links to files owned by other "
1366     "groups");
1367 
1368 static int
1369 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1370 {
1371 	struct vattr va;
1372 	int error;
1373 
1374 	if (!hardlink_check_uid && !hardlink_check_gid)
1375 		return (0);
1376 
1377 	error = VOP_GETATTR(vp, &va, cred, td);
1378 	if (error != 0)
1379 		return (error);
1380 
1381 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1382 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1383 		    SUSER_ALLOWJAIL);
1384 		if (error)
1385 			return (error);
1386 	}
1387 
1388 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1389 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1390 		    SUSER_ALLOWJAIL);
1391 		if (error)
1392 			return (error);
1393 	}
1394 
1395 	return (0);
1396 }
1397 
1398 int
1399 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1400 {
1401 	struct vnode *vp;
1402 	struct mount *mp;
1403 	struct nameidata nd;
1404 	int vfslocked;
1405 	int lvfslocked;
1406 	int error;
1407 
1408 	bwillwrite();
1409 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1410 	if ((error = namei(&nd)) != 0)
1411 		return (error);
1412 	vfslocked = NDHASGIANT(&nd);
1413 	NDFREE(&nd, NDF_ONLY_PNBUF);
1414 	vp = nd.ni_vp;
1415 	if (vp->v_type == VDIR) {
1416 		vrele(vp);
1417 		VFS_UNLOCK_GIANT(vfslocked);
1418 		return (EPERM);		/* POSIX */
1419 	}
1420 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1421 		vrele(vp);
1422 		VFS_UNLOCK_GIANT(vfslocked);
1423 		return (error);
1424 	}
1425 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1426 	    segflg, link, td);
1427 	if ((error = namei(&nd)) == 0) {
1428 		lvfslocked = NDHASGIANT(&nd);
1429 		if (nd.ni_vp != NULL) {
1430 			if (nd.ni_dvp == nd.ni_vp)
1431 				vrele(nd.ni_dvp);
1432 			else
1433 				vput(nd.ni_dvp);
1434 			vrele(nd.ni_vp);
1435 			error = EEXIST;
1436 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1437 		    == 0) {
1438 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1439 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1440 			error = can_hardlink(vp, td, td->td_ucred);
1441 			if (error == 0)
1442 #ifdef MAC
1443 				error = mac_check_vnode_link(td->td_ucred,
1444 				    nd.ni_dvp, vp, &nd.ni_cnd);
1445 			if (error == 0)
1446 #endif
1447 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1448 			VOP_UNLOCK(vp, 0, td);
1449 			vput(nd.ni_dvp);
1450 		}
1451 		NDFREE(&nd, NDF_ONLY_PNBUF);
1452 		VFS_UNLOCK_GIANT(lvfslocked);
1453 	}
1454 	vrele(vp);
1455 	vn_finished_write(mp);
1456 	VFS_UNLOCK_GIANT(vfslocked);
1457 	return (error);
1458 }
1459 
1460 /*
1461  * Make a symbolic link.
1462  */
1463 #ifndef _SYS_SYSPROTO_H_
1464 struct symlink_args {
1465 	char	*path;
1466 	char	*link;
1467 };
1468 #endif
1469 int
1470 symlink(td, uap)
1471 	struct thread *td;
1472 	register struct symlink_args /* {
1473 		char *path;
1474 		char *link;
1475 	} */ *uap;
1476 {
1477 
1478 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1479 }
1480 
1481 int
1482 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1483 {
1484 	struct mount *mp;
1485 	struct vattr vattr;
1486 	char *syspath;
1487 	int error;
1488 	struct nameidata nd;
1489 	int vfslocked;
1490 
1491 	if (segflg == UIO_SYSSPACE) {
1492 		syspath = path;
1493 	} else {
1494 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1495 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1496 			goto out;
1497 	}
1498 	AUDIT_ARG(text, syspath);
1499 restart:
1500 	bwillwrite();
1501 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1502 	    segflg, link, td);
1503 	if ((error = namei(&nd)) != 0)
1504 		goto out;
1505 	vfslocked = NDHASGIANT(&nd);
1506 	if (nd.ni_vp) {
1507 		NDFREE(&nd, NDF_ONLY_PNBUF);
1508 		if (nd.ni_vp == nd.ni_dvp)
1509 			vrele(nd.ni_dvp);
1510 		else
1511 			vput(nd.ni_dvp);
1512 		vrele(nd.ni_vp);
1513 		VFS_UNLOCK_GIANT(vfslocked);
1514 		error = EEXIST;
1515 		goto out;
1516 	}
1517 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1518 		NDFREE(&nd, NDF_ONLY_PNBUF);
1519 		vput(nd.ni_dvp);
1520 		VFS_UNLOCK_GIANT(vfslocked);
1521 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1522 			goto out;
1523 		goto restart;
1524 	}
1525 	VATTR_NULL(&vattr);
1526 	FILEDESC_SLOCK(td->td_proc->p_fd);
1527 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1528 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1529 #ifdef MAC
1530 	vattr.va_type = VLNK;
1531 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1532 	    &vattr);
1533 	if (error)
1534 		goto out2;
1535 #endif
1536 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1537 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1538 	if (error == 0)
1539 		vput(nd.ni_vp);
1540 #ifdef MAC
1541 out2:
1542 #endif
1543 	NDFREE(&nd, NDF_ONLY_PNBUF);
1544 	vput(nd.ni_dvp);
1545 	vn_finished_write(mp);
1546 	VFS_UNLOCK_GIANT(vfslocked);
1547 out:
1548 	if (segflg != UIO_SYSSPACE)
1549 		uma_zfree(namei_zone, syspath);
1550 	return (error);
1551 }
1552 
1553 /*
1554  * Delete a whiteout from the filesystem.
1555  */
1556 int
1557 undelete(td, uap)
1558 	struct thread *td;
1559 	register struct undelete_args /* {
1560 		char *path;
1561 	} */ *uap;
1562 {
1563 	int error;
1564 	struct mount *mp;
1565 	struct nameidata nd;
1566 	int vfslocked;
1567 
1568 restart:
1569 	bwillwrite();
1570 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1571 	    UIO_USERSPACE, uap->path, td);
1572 	error = namei(&nd);
1573 	if (error)
1574 		return (error);
1575 	vfslocked = NDHASGIANT(&nd);
1576 
1577 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1578 		NDFREE(&nd, NDF_ONLY_PNBUF);
1579 		if (nd.ni_vp == nd.ni_dvp)
1580 			vrele(nd.ni_dvp);
1581 		else
1582 			vput(nd.ni_dvp);
1583 		if (nd.ni_vp)
1584 			vrele(nd.ni_vp);
1585 		VFS_UNLOCK_GIANT(vfslocked);
1586 		return (EEXIST);
1587 	}
1588 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1589 		NDFREE(&nd, NDF_ONLY_PNBUF);
1590 		vput(nd.ni_dvp);
1591 		VFS_UNLOCK_GIANT(vfslocked);
1592 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1593 			return (error);
1594 		goto restart;
1595 	}
1596 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1597 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1598 	NDFREE(&nd, NDF_ONLY_PNBUF);
1599 	vput(nd.ni_dvp);
1600 	vn_finished_write(mp);
1601 	VFS_UNLOCK_GIANT(vfslocked);
1602 	return (error);
1603 }
1604 
1605 /*
1606  * Delete a name from the filesystem.
1607  */
1608 #ifndef _SYS_SYSPROTO_H_
1609 struct unlink_args {
1610 	char	*path;
1611 };
1612 #endif
1613 int
1614 unlink(td, uap)
1615 	struct thread *td;
1616 	struct unlink_args /* {
1617 		char *path;
1618 	} */ *uap;
1619 {
1620 	int error;
1621 
1622 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1623 	return (error);
1624 }
1625 
1626 int
1627 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1628 {
1629 	struct mount *mp;
1630 	struct vnode *vp;
1631 	int error;
1632 	struct nameidata nd;
1633 	int vfslocked;
1634 
1635 restart:
1636 	bwillwrite();
1637 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1638 	    pathseg, path, td);
1639 	if ((error = namei(&nd)) != 0)
1640 		return (error == EINVAL ? EPERM : error);
1641 	vfslocked = NDHASGIANT(&nd);
1642 	vp = nd.ni_vp;
1643 	if (vp->v_type == VDIR)
1644 		error = EPERM;		/* POSIX */
1645 	else {
1646 		/*
1647 		 * The root of a mounted filesystem cannot be deleted.
1648 		 *
1649 		 * XXX: can this only be a VDIR case?
1650 		 */
1651 		if (vp->v_vflag & VV_ROOT)
1652 			error = EBUSY;
1653 	}
1654 	if (error == 0) {
1655 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1656 			NDFREE(&nd, NDF_ONLY_PNBUF);
1657 			vput(nd.ni_dvp);
1658 			if (vp == nd.ni_dvp)
1659 				vrele(vp);
1660 			else
1661 				vput(vp);
1662 			VFS_UNLOCK_GIANT(vfslocked);
1663 			if ((error = vn_start_write(NULL, &mp,
1664 			    V_XSLEEP | PCATCH)) != 0)
1665 				return (error);
1666 			goto restart;
1667 		}
1668 #ifdef MAC
1669 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1670 		    &nd.ni_cnd);
1671 		if (error)
1672 			goto out;
1673 #endif
1674 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1675 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1676 #ifdef MAC
1677 out:
1678 #endif
1679 		vn_finished_write(mp);
1680 	}
1681 	NDFREE(&nd, NDF_ONLY_PNBUF);
1682 	vput(nd.ni_dvp);
1683 	if (vp == nd.ni_dvp)
1684 		vrele(vp);
1685 	else
1686 		vput(vp);
1687 	VFS_UNLOCK_GIANT(vfslocked);
1688 	return (error);
1689 }
1690 
1691 /*
1692  * Reposition read/write file offset.
1693  */
1694 #ifndef _SYS_SYSPROTO_H_
1695 struct lseek_args {
1696 	int	fd;
1697 	int	pad;
1698 	off_t	offset;
1699 	int	whence;
1700 };
1701 #endif
1702 int
1703 lseek(td, uap)
1704 	struct thread *td;
1705 	register struct lseek_args /* {
1706 		int fd;
1707 		int pad;
1708 		off_t offset;
1709 		int whence;
1710 	} */ *uap;
1711 {
1712 	struct ucred *cred = td->td_ucred;
1713 	struct file *fp;
1714 	struct vnode *vp;
1715 	struct vattr vattr;
1716 	off_t offset;
1717 	int error, noneg;
1718 	int vfslocked;
1719 
1720 	if ((error = fget(td, uap->fd, &fp)) != 0)
1721 		return (error);
1722 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1723 		fdrop(fp, td);
1724 		return (ESPIPE);
1725 	}
1726 	vp = fp->f_vnode;
1727 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1728 	noneg = (vp->v_type != VCHR);
1729 	offset = uap->offset;
1730 	switch (uap->whence) {
1731 	case L_INCR:
1732 		if (noneg &&
1733 		    (fp->f_offset < 0 ||
1734 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1735 			error = EOVERFLOW;
1736 			break;
1737 		}
1738 		offset += fp->f_offset;
1739 		break;
1740 	case L_XTND:
1741 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1742 		error = VOP_GETATTR(vp, &vattr, cred, td);
1743 		VOP_UNLOCK(vp, 0, td);
1744 		if (error)
1745 			break;
1746 		if (noneg &&
1747 		    (vattr.va_size > OFF_MAX ||
1748 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1749 			error = EOVERFLOW;
1750 			break;
1751 		}
1752 		offset += vattr.va_size;
1753 		break;
1754 	case L_SET:
1755 		break;
1756 	case SEEK_DATA:
1757 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1758 		break;
1759 	case SEEK_HOLE:
1760 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1761 		break;
1762 	default:
1763 		error = EINVAL;
1764 	}
1765 	if (error == 0 && noneg && offset < 0)
1766 		error = EINVAL;
1767 	if (error != 0)
1768 		goto drop;
1769 	fp->f_offset = offset;
1770 	*(off_t *)(td->td_retval) = fp->f_offset;
1771 drop:
1772 	fdrop(fp, td);
1773 	VFS_UNLOCK_GIANT(vfslocked);
1774 	return (error);
1775 }
1776 
1777 #if defined(COMPAT_43)
1778 /*
1779  * Reposition read/write file offset.
1780  */
1781 #ifndef _SYS_SYSPROTO_H_
1782 struct olseek_args {
1783 	int	fd;
1784 	long	offset;
1785 	int	whence;
1786 };
1787 #endif
1788 int
1789 olseek(td, uap)
1790 	struct thread *td;
1791 	register struct olseek_args /* {
1792 		int fd;
1793 		long offset;
1794 		int whence;
1795 	} */ *uap;
1796 {
1797 	struct lseek_args /* {
1798 		int fd;
1799 		int pad;
1800 		off_t offset;
1801 		int whence;
1802 	} */ nuap;
1803 	int error;
1804 
1805 	nuap.fd = uap->fd;
1806 	nuap.offset = uap->offset;
1807 	nuap.whence = uap->whence;
1808 	error = lseek(td, &nuap);
1809 	return (error);
1810 }
1811 #endif /* COMPAT_43 */
1812 
1813 /*
1814  * Check access permissions using passed credentials.
1815  */
1816 static int
1817 vn_access(vp, user_flags, cred, td)
1818 	struct vnode	*vp;
1819 	int		user_flags;
1820 	struct ucred	*cred;
1821 	struct thread	*td;
1822 {
1823 	int error, flags;
1824 
1825 	/* Flags == 0 means only check for existence. */
1826 	error = 0;
1827 	if (user_flags) {
1828 		flags = 0;
1829 		if (user_flags & R_OK)
1830 			flags |= VREAD;
1831 		if (user_flags & W_OK)
1832 			flags |= VWRITE;
1833 		if (user_flags & X_OK)
1834 			flags |= VEXEC;
1835 #ifdef MAC
1836 		error = mac_check_vnode_access(cred, vp, flags);
1837 		if (error)
1838 			return (error);
1839 #endif
1840 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1841 			error = VOP_ACCESS(vp, flags, cred, td);
1842 	}
1843 	return (error);
1844 }
1845 
1846 /*
1847  * Check access permissions using "real" credentials.
1848  */
1849 #ifndef _SYS_SYSPROTO_H_
1850 struct access_args {
1851 	char	*path;
1852 	int	flags;
1853 };
1854 #endif
1855 int
1856 access(td, uap)
1857 	struct thread *td;
1858 	register struct access_args /* {
1859 		char *path;
1860 		int flags;
1861 	} */ *uap;
1862 {
1863 
1864 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1865 }
1866 
1867 int
1868 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1869 {
1870 	struct ucred *cred, *tmpcred;
1871 	register struct vnode *vp;
1872 	struct nameidata nd;
1873 	int vfslocked;
1874 	int error;
1875 
1876 	/*
1877 	 * Create and modify a temporary credential instead of one that
1878 	 * is potentially shared.  This could also mess up socket
1879 	 * buffer accounting which can run in an interrupt context.
1880 	 */
1881 	cred = td->td_ucred;
1882 	tmpcred = crdup(cred);
1883 	tmpcred->cr_uid = cred->cr_ruid;
1884 	tmpcred->cr_groups[0] = cred->cr_rgid;
1885 	td->td_ucred = tmpcred;
1886 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1887 	    pathseg, path, td);
1888 	if ((error = namei(&nd)) != 0)
1889 		goto out1;
1890 	vfslocked = NDHASGIANT(&nd);
1891 	vp = nd.ni_vp;
1892 
1893 	error = vn_access(vp, flags, tmpcred, td);
1894 	NDFREE(&nd, NDF_ONLY_PNBUF);
1895 	vput(vp);
1896 	VFS_UNLOCK_GIANT(vfslocked);
1897 out1:
1898 	td->td_ucred = cred;
1899 	crfree(tmpcred);
1900 	return (error);
1901 }
1902 
1903 /*
1904  * Check access permissions using "effective" credentials.
1905  */
1906 #ifndef _SYS_SYSPROTO_H_
1907 struct eaccess_args {
1908 	char	*path;
1909 	int	flags;
1910 };
1911 #endif
1912 int
1913 eaccess(td, uap)
1914 	struct thread *td;
1915 	register struct eaccess_args /* {
1916 		char *path;
1917 		int flags;
1918 	} */ *uap;
1919 {
1920 
1921 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1922 }
1923 
1924 int
1925 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1926 {
1927 	struct nameidata nd;
1928 	struct vnode *vp;
1929 	int vfslocked;
1930 	int error;
1931 
1932 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1933 	    pathseg, path, td);
1934 	if ((error = namei(&nd)) != 0)
1935 		return (error);
1936 	vp = nd.ni_vp;
1937 	vfslocked = NDHASGIANT(&nd);
1938 	error = vn_access(vp, flags, td->td_ucred, td);
1939 	NDFREE(&nd, NDF_ONLY_PNBUF);
1940 	vput(vp);
1941 	VFS_UNLOCK_GIANT(vfslocked);
1942 	return (error);
1943 }
1944 
1945 #if defined(COMPAT_43)
1946 /*
1947  * Get file status; this version follows links.
1948  */
1949 #ifndef _SYS_SYSPROTO_H_
1950 struct ostat_args {
1951 	char	*path;
1952 	struct ostat *ub;
1953 };
1954 #endif
1955 int
1956 ostat(td, uap)
1957 	struct thread *td;
1958 	register struct ostat_args /* {
1959 		char *path;
1960 		struct ostat *ub;
1961 	} */ *uap;
1962 {
1963 	struct stat sb;
1964 	struct ostat osb;
1965 	int error;
1966 
1967 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1968 	if (error)
1969 		return (error);
1970 	cvtstat(&sb, &osb);
1971 	error = copyout(&osb, uap->ub, sizeof (osb));
1972 	return (error);
1973 }
1974 
1975 /*
1976  * Get file status; this version does not follow links.
1977  */
1978 #ifndef _SYS_SYSPROTO_H_
1979 struct olstat_args {
1980 	char	*path;
1981 	struct ostat *ub;
1982 };
1983 #endif
1984 int
1985 olstat(td, uap)
1986 	struct thread *td;
1987 	register struct olstat_args /* {
1988 		char *path;
1989 		struct ostat *ub;
1990 	} */ *uap;
1991 {
1992 	struct stat sb;
1993 	struct ostat osb;
1994 	int error;
1995 
1996 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1997 	if (error)
1998 		return (error);
1999 	cvtstat(&sb, &osb);
2000 	error = copyout(&osb, uap->ub, sizeof (osb));
2001 	return (error);
2002 }
2003 
2004 /*
2005  * Convert from an old to a new stat structure.
2006  */
2007 void
2008 cvtstat(st, ost)
2009 	struct stat *st;
2010 	struct ostat *ost;
2011 {
2012 
2013 	ost->st_dev = st->st_dev;
2014 	ost->st_ino = st->st_ino;
2015 	ost->st_mode = st->st_mode;
2016 	ost->st_nlink = st->st_nlink;
2017 	ost->st_uid = st->st_uid;
2018 	ost->st_gid = st->st_gid;
2019 	ost->st_rdev = st->st_rdev;
2020 	if (st->st_size < (quad_t)1 << 32)
2021 		ost->st_size = st->st_size;
2022 	else
2023 		ost->st_size = -2;
2024 	ost->st_atime = st->st_atime;
2025 	ost->st_mtime = st->st_mtime;
2026 	ost->st_ctime = st->st_ctime;
2027 	ost->st_blksize = st->st_blksize;
2028 	ost->st_blocks = st->st_blocks;
2029 	ost->st_flags = st->st_flags;
2030 	ost->st_gen = st->st_gen;
2031 }
2032 #endif /* COMPAT_43 */
2033 
2034 /*
2035  * Get file status; this version follows links.
2036  */
2037 #ifndef _SYS_SYSPROTO_H_
2038 struct stat_args {
2039 	char	*path;
2040 	struct stat *ub;
2041 };
2042 #endif
2043 int
2044 stat(td, uap)
2045 	struct thread *td;
2046 	register struct stat_args /* {
2047 		char *path;
2048 		struct stat *ub;
2049 	} */ *uap;
2050 {
2051 	struct stat sb;
2052 	int error;
2053 
2054 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2055 	if (error == 0)
2056 		error = copyout(&sb, uap->ub, sizeof (sb));
2057 	return (error);
2058 }
2059 
2060 int
2061 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2062 {
2063 	struct nameidata nd;
2064 	struct stat sb;
2065 	int error, vfslocked;
2066 
2067 	NDINIT(&nd, LOOKUP,
2068 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2069 	    pathseg, path, td);
2070 	if ((error = namei(&nd)) != 0)
2071 		return (error);
2072 	vfslocked = NDHASGIANT(&nd);
2073 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2074 	NDFREE(&nd, NDF_ONLY_PNBUF);
2075 	vput(nd.ni_vp);
2076 	VFS_UNLOCK_GIANT(vfslocked);
2077 	if (mtx_owned(&Giant))
2078 		printf("stat(%d): %s\n", vfslocked, path);
2079 	if (error)
2080 		return (error);
2081 	*sbp = sb;
2082 	return (0);
2083 }
2084 
2085 /*
2086  * Get file status; this version does not follow links.
2087  */
2088 #ifndef _SYS_SYSPROTO_H_
2089 struct lstat_args {
2090 	char	*path;
2091 	struct stat *ub;
2092 };
2093 #endif
2094 int
2095 lstat(td, uap)
2096 	struct thread *td;
2097 	register struct lstat_args /* {
2098 		char *path;
2099 		struct stat *ub;
2100 	} */ *uap;
2101 {
2102 	struct stat sb;
2103 	int error;
2104 
2105 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2106 	if (error == 0)
2107 		error = copyout(&sb, uap->ub, sizeof (sb));
2108 	return (error);
2109 }
2110 
2111 int
2112 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2113 {
2114 	struct vnode *vp;
2115 	struct stat sb;
2116 	struct nameidata nd;
2117 	int error, vfslocked;
2118 
2119 	NDINIT(&nd, LOOKUP,
2120 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2121 	    pathseg, path, td);
2122 	if ((error = namei(&nd)) != 0)
2123 		return (error);
2124 	vfslocked = NDHASGIANT(&nd);
2125 	vp = nd.ni_vp;
2126 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2127 	NDFREE(&nd, NDF_ONLY_PNBUF);
2128 	vput(vp);
2129 	VFS_UNLOCK_GIANT(vfslocked);
2130 	if (error)
2131 		return (error);
2132 	*sbp = sb;
2133 	return (0);
2134 }
2135 
2136 /*
2137  * Implementation of the NetBSD [l]stat() functions.
2138  */
2139 void
2140 cvtnstat(sb, nsb)
2141 	struct stat *sb;
2142 	struct nstat *nsb;
2143 {
2144 	bzero(nsb, sizeof *nsb);
2145 	nsb->st_dev = sb->st_dev;
2146 	nsb->st_ino = sb->st_ino;
2147 	nsb->st_mode = sb->st_mode;
2148 	nsb->st_nlink = sb->st_nlink;
2149 	nsb->st_uid = sb->st_uid;
2150 	nsb->st_gid = sb->st_gid;
2151 	nsb->st_rdev = sb->st_rdev;
2152 	nsb->st_atimespec = sb->st_atimespec;
2153 	nsb->st_mtimespec = sb->st_mtimespec;
2154 	nsb->st_ctimespec = sb->st_ctimespec;
2155 	nsb->st_size = sb->st_size;
2156 	nsb->st_blocks = sb->st_blocks;
2157 	nsb->st_blksize = sb->st_blksize;
2158 	nsb->st_flags = sb->st_flags;
2159 	nsb->st_gen = sb->st_gen;
2160 	nsb->st_birthtimespec = sb->st_birthtimespec;
2161 }
2162 
2163 #ifndef _SYS_SYSPROTO_H_
2164 struct nstat_args {
2165 	char	*path;
2166 	struct nstat *ub;
2167 };
2168 #endif
2169 int
2170 nstat(td, uap)
2171 	struct thread *td;
2172 	register struct nstat_args /* {
2173 		char *path;
2174 		struct nstat *ub;
2175 	} */ *uap;
2176 {
2177 	struct stat sb;
2178 	struct nstat nsb;
2179 	int error;
2180 
2181 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2182 	if (error)
2183 		return (error);
2184 	cvtnstat(&sb, &nsb);
2185 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2186 	return (error);
2187 }
2188 
2189 /*
2190  * NetBSD lstat.  Get file status; this version does not follow links.
2191  */
2192 #ifndef _SYS_SYSPROTO_H_
2193 struct lstat_args {
2194 	char	*path;
2195 	struct stat *ub;
2196 };
2197 #endif
2198 int
2199 nlstat(td, uap)
2200 	struct thread *td;
2201 	register struct nlstat_args /* {
2202 		char *path;
2203 		struct nstat *ub;
2204 	} */ *uap;
2205 {
2206 	struct stat sb;
2207 	struct nstat nsb;
2208 	int error;
2209 
2210 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2211 	if (error)
2212 		return (error);
2213 	cvtnstat(&sb, &nsb);
2214 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2215 	return (error);
2216 }
2217 
2218 /*
2219  * Get configurable pathname variables.
2220  */
2221 #ifndef _SYS_SYSPROTO_H_
2222 struct pathconf_args {
2223 	char	*path;
2224 	int	name;
2225 };
2226 #endif
2227 int
2228 pathconf(td, uap)
2229 	struct thread *td;
2230 	register struct pathconf_args /* {
2231 		char *path;
2232 		int name;
2233 	} */ *uap;
2234 {
2235 
2236 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2237 }
2238 
2239 int
2240 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2241 {
2242 	struct nameidata nd;
2243 	int error, vfslocked;
2244 
2245 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2246 	    pathseg, path, td);
2247 	if ((error = namei(&nd)) != 0)
2248 		return (error);
2249 	vfslocked = NDHASGIANT(&nd);
2250 	NDFREE(&nd, NDF_ONLY_PNBUF);
2251 
2252 	/* If asynchronous I/O is available, it works for all files. */
2253 	if (name == _PC_ASYNC_IO)
2254 		td->td_retval[0] = async_io_version;
2255 	else
2256 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2257 	vput(nd.ni_vp);
2258 	VFS_UNLOCK_GIANT(vfslocked);
2259 	return (error);
2260 }
2261 
2262 /*
2263  * Return target name of a symbolic link.
2264  */
2265 #ifndef _SYS_SYSPROTO_H_
2266 struct readlink_args {
2267 	char	*path;
2268 	char	*buf;
2269 	int	count;
2270 };
2271 #endif
2272 int
2273 readlink(td, uap)
2274 	struct thread *td;
2275 	register struct readlink_args /* {
2276 		char *path;
2277 		char *buf;
2278 		int count;
2279 	} */ *uap;
2280 {
2281 
2282 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2283 	    UIO_USERSPACE, uap->count));
2284 }
2285 
2286 int
2287 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2288     enum uio_seg bufseg, int count)
2289 {
2290 	register struct vnode *vp;
2291 	struct iovec aiov;
2292 	struct uio auio;
2293 	int error;
2294 	struct nameidata nd;
2295 	int vfslocked;
2296 
2297 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2298 	    pathseg, path, td);
2299 	if ((error = namei(&nd)) != 0)
2300 		return (error);
2301 	NDFREE(&nd, NDF_ONLY_PNBUF);
2302 	vfslocked = NDHASGIANT(&nd);
2303 	vp = nd.ni_vp;
2304 #ifdef MAC
2305 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2306 	if (error) {
2307 		vput(vp);
2308 		VFS_UNLOCK_GIANT(vfslocked);
2309 		return (error);
2310 	}
2311 #endif
2312 	if (vp->v_type != VLNK)
2313 		error = EINVAL;
2314 	else {
2315 		aiov.iov_base = buf;
2316 		aiov.iov_len = count;
2317 		auio.uio_iov = &aiov;
2318 		auio.uio_iovcnt = 1;
2319 		auio.uio_offset = 0;
2320 		auio.uio_rw = UIO_READ;
2321 		auio.uio_segflg = bufseg;
2322 		auio.uio_td = td;
2323 		auio.uio_resid = count;
2324 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2325 	}
2326 	vput(vp);
2327 	VFS_UNLOCK_GIANT(vfslocked);
2328 	td->td_retval[0] = count - auio.uio_resid;
2329 	return (error);
2330 }
2331 
2332 /*
2333  * Common implementation code for chflags() and fchflags().
2334  */
2335 static int
2336 setfflags(td, vp, flags)
2337 	struct thread *td;
2338 	struct vnode *vp;
2339 	int flags;
2340 {
2341 	int error;
2342 	struct mount *mp;
2343 	struct vattr vattr;
2344 
2345 	/*
2346 	 * Prevent non-root users from setting flags on devices.  When
2347 	 * a device is reused, users can retain ownership of the device
2348 	 * if they are allowed to set flags and programs assume that
2349 	 * chown can't fail when done as root.
2350 	 */
2351 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2352 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2353 		    SUSER_ALLOWJAIL);
2354 		if (error)
2355 			return (error);
2356 	}
2357 
2358 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2359 		return (error);
2360 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2361 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2362 	VATTR_NULL(&vattr);
2363 	vattr.va_flags = flags;
2364 #ifdef MAC
2365 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2366 	if (error == 0)
2367 #endif
2368 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2369 	VOP_UNLOCK(vp, 0, td);
2370 	vn_finished_write(mp);
2371 	return (error);
2372 }
2373 
2374 /*
2375  * Change flags of a file given a path name.
2376  */
2377 #ifndef _SYS_SYSPROTO_H_
2378 struct chflags_args {
2379 	char	*path;
2380 	int	flags;
2381 };
2382 #endif
2383 int
2384 chflags(td, uap)
2385 	struct thread *td;
2386 	register struct chflags_args /* {
2387 		char *path;
2388 		int flags;
2389 	} */ *uap;
2390 {
2391 	int error;
2392 	struct nameidata nd;
2393 	int vfslocked;
2394 
2395 	AUDIT_ARG(fflags, uap->flags);
2396 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2397 	    uap->path, td);
2398 	if ((error = namei(&nd)) != 0)
2399 		return (error);
2400 	NDFREE(&nd, NDF_ONLY_PNBUF);
2401 	vfslocked = NDHASGIANT(&nd);
2402 	error = setfflags(td, nd.ni_vp, uap->flags);
2403 	vrele(nd.ni_vp);
2404 	VFS_UNLOCK_GIANT(vfslocked);
2405 	return (error);
2406 }
2407 
2408 /*
2409  * Same as chflags() but doesn't follow symlinks.
2410  */
2411 int
2412 lchflags(td, uap)
2413 	struct thread *td;
2414 	register struct lchflags_args /* {
2415 		char *path;
2416 		int flags;
2417 	} */ *uap;
2418 {
2419 	int error;
2420 	struct nameidata nd;
2421 	int vfslocked;
2422 
2423 	AUDIT_ARG(fflags, uap->flags);
2424 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2425 	    uap->path, td);
2426 	if ((error = namei(&nd)) != 0)
2427 		return (error);
2428 	vfslocked = NDHASGIANT(&nd);
2429 	NDFREE(&nd, NDF_ONLY_PNBUF);
2430 	error = setfflags(td, nd.ni_vp, uap->flags);
2431 	vrele(nd.ni_vp);
2432 	VFS_UNLOCK_GIANT(vfslocked);
2433 	return (error);
2434 }
2435 
2436 /*
2437  * Change flags of a file given a file descriptor.
2438  */
2439 #ifndef _SYS_SYSPROTO_H_
2440 struct fchflags_args {
2441 	int	fd;
2442 	int	flags;
2443 };
2444 #endif
2445 int
2446 fchflags(td, uap)
2447 	struct thread *td;
2448 	register struct fchflags_args /* {
2449 		int fd;
2450 		int flags;
2451 	} */ *uap;
2452 {
2453 	struct file *fp;
2454 	int vfslocked;
2455 	int error;
2456 
2457 	AUDIT_ARG(fd, uap->fd);
2458 	AUDIT_ARG(fflags, uap->flags);
2459 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2460 		return (error);
2461 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2462 #ifdef AUDIT
2463 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2464 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2465 	VOP_UNLOCK(fp->f_vnode, 0, td);
2466 #endif
2467 	error = setfflags(td, fp->f_vnode, uap->flags);
2468 	VFS_UNLOCK_GIANT(vfslocked);
2469 	fdrop(fp, td);
2470 	return (error);
2471 }
2472 
2473 /*
2474  * Common implementation code for chmod(), lchmod() and fchmod().
2475  */
2476 static int
2477 setfmode(td, vp, mode)
2478 	struct thread *td;
2479 	struct vnode *vp;
2480 	int mode;
2481 {
2482 	int error;
2483 	struct mount *mp;
2484 	struct vattr vattr;
2485 
2486 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2487 		return (error);
2488 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2489 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2490 	VATTR_NULL(&vattr);
2491 	vattr.va_mode = mode & ALLPERMS;
2492 #ifdef MAC
2493 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2494 	if (error == 0)
2495 #endif
2496 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2497 	VOP_UNLOCK(vp, 0, td);
2498 	vn_finished_write(mp);
2499 	return (error);
2500 }
2501 
2502 /*
2503  * Change mode of a file given path name.
2504  */
2505 #ifndef _SYS_SYSPROTO_H_
2506 struct chmod_args {
2507 	char	*path;
2508 	int	mode;
2509 };
2510 #endif
2511 int
2512 chmod(td, uap)
2513 	struct thread *td;
2514 	register struct chmod_args /* {
2515 		char *path;
2516 		int mode;
2517 	} */ *uap;
2518 {
2519 
2520 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2521 }
2522 
2523 int
2524 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2525 {
2526 	int error;
2527 	struct nameidata nd;
2528 	int vfslocked;
2529 
2530 	AUDIT_ARG(mode, mode);
2531 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2532 	if ((error = namei(&nd)) != 0)
2533 		return (error);
2534 	vfslocked = NDHASGIANT(&nd);
2535 	NDFREE(&nd, NDF_ONLY_PNBUF);
2536 	error = setfmode(td, nd.ni_vp, mode);
2537 	vrele(nd.ni_vp);
2538 	VFS_UNLOCK_GIANT(vfslocked);
2539 	return (error);
2540 }
2541 
2542 /*
2543  * Change mode of a file given path name (don't follow links.)
2544  */
2545 #ifndef _SYS_SYSPROTO_H_
2546 struct lchmod_args {
2547 	char	*path;
2548 	int	mode;
2549 };
2550 #endif
2551 int
2552 lchmod(td, uap)
2553 	struct thread *td;
2554 	register struct lchmod_args /* {
2555 		char *path;
2556 		int mode;
2557 	} */ *uap;
2558 {
2559 	int error;
2560 	struct nameidata nd;
2561 	int vfslocked;
2562 
2563 	AUDIT_ARG(mode, (mode_t)uap->mode);
2564 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2565 	    uap->path, td);
2566 	if ((error = namei(&nd)) != 0)
2567 		return (error);
2568 	vfslocked = NDHASGIANT(&nd);
2569 	NDFREE(&nd, NDF_ONLY_PNBUF);
2570 	error = setfmode(td, nd.ni_vp, uap->mode);
2571 	vrele(nd.ni_vp);
2572 	VFS_UNLOCK_GIANT(vfslocked);
2573 	return (error);
2574 }
2575 
2576 /*
2577  * Change mode of a file given a file descriptor.
2578  */
2579 #ifndef _SYS_SYSPROTO_H_
2580 struct fchmod_args {
2581 	int	fd;
2582 	int	mode;
2583 };
2584 #endif
2585 int
2586 fchmod(td, uap)
2587 	struct thread *td;
2588 	register struct fchmod_args /* {
2589 		int fd;
2590 		int mode;
2591 	} */ *uap;
2592 {
2593 	struct file *fp;
2594 	int vfslocked;
2595 	int error;
2596 
2597 	AUDIT_ARG(fd, uap->fd);
2598 	AUDIT_ARG(mode, uap->mode);
2599 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2600 		return (error);
2601 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2602 #ifdef AUDIT
2603 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2604 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2605 	VOP_UNLOCK(fp->f_vnode, 0, td);
2606 #endif
2607 	error = setfmode(td, fp->f_vnode, uap->mode);
2608 	VFS_UNLOCK_GIANT(vfslocked);
2609 	fdrop(fp, td);
2610 	return (error);
2611 }
2612 
2613 /*
2614  * Common implementation for chown(), lchown(), and fchown()
2615  */
2616 static int
2617 setfown(td, vp, uid, gid)
2618 	struct thread *td;
2619 	struct vnode *vp;
2620 	uid_t uid;
2621 	gid_t gid;
2622 {
2623 	int error;
2624 	struct mount *mp;
2625 	struct vattr vattr;
2626 
2627 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2628 		return (error);
2629 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2630 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2631 	VATTR_NULL(&vattr);
2632 	vattr.va_uid = uid;
2633 	vattr.va_gid = gid;
2634 #ifdef MAC
2635 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2636 	    vattr.va_gid);
2637 	if (error == 0)
2638 #endif
2639 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2640 	VOP_UNLOCK(vp, 0, td);
2641 	vn_finished_write(mp);
2642 	return (error);
2643 }
2644 
2645 /*
2646  * Set ownership given a path name.
2647  */
2648 #ifndef _SYS_SYSPROTO_H_
2649 struct chown_args {
2650 	char	*path;
2651 	int	uid;
2652 	int	gid;
2653 };
2654 #endif
2655 int
2656 chown(td, uap)
2657 	struct thread *td;
2658 	register struct chown_args /* {
2659 		char *path;
2660 		int uid;
2661 		int gid;
2662 	} */ *uap;
2663 {
2664 
2665 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2666 }
2667 
2668 int
2669 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2670     int gid)
2671 {
2672 	int error;
2673 	struct nameidata nd;
2674 	int vfslocked;
2675 
2676 	AUDIT_ARG(owner, uid, gid);
2677 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2678 	if ((error = namei(&nd)) != 0)
2679 		return (error);
2680 	vfslocked = NDHASGIANT(&nd);
2681 	NDFREE(&nd, NDF_ONLY_PNBUF);
2682 	error = setfown(td, nd.ni_vp, uid, gid);
2683 	vrele(nd.ni_vp);
2684 	VFS_UNLOCK_GIANT(vfslocked);
2685 	return (error);
2686 }
2687 
2688 /*
2689  * Set ownership given a path name, do not cross symlinks.
2690  */
2691 #ifndef _SYS_SYSPROTO_H_
2692 struct lchown_args {
2693 	char	*path;
2694 	int	uid;
2695 	int	gid;
2696 };
2697 #endif
2698 int
2699 lchown(td, uap)
2700 	struct thread *td;
2701 	register struct lchown_args /* {
2702 		char *path;
2703 		int uid;
2704 		int gid;
2705 	} */ *uap;
2706 {
2707 
2708 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2709 }
2710 
2711 int
2712 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2713     int gid)
2714 {
2715 	int error;
2716 	struct nameidata nd;
2717 	int vfslocked;
2718 
2719 	AUDIT_ARG(owner, uid, gid);
2720 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2721 	if ((error = namei(&nd)) != 0)
2722 		return (error);
2723 	vfslocked = NDHASGIANT(&nd);
2724 	NDFREE(&nd, NDF_ONLY_PNBUF);
2725 	error = setfown(td, nd.ni_vp, uid, gid);
2726 	vrele(nd.ni_vp);
2727 	VFS_UNLOCK_GIANT(vfslocked);
2728 	return (error);
2729 }
2730 
2731 /*
2732  * Set ownership given a file descriptor.
2733  */
2734 #ifndef _SYS_SYSPROTO_H_
2735 struct fchown_args {
2736 	int	fd;
2737 	int	uid;
2738 	int	gid;
2739 };
2740 #endif
2741 int
2742 fchown(td, uap)
2743 	struct thread *td;
2744 	register struct fchown_args /* {
2745 		int fd;
2746 		int uid;
2747 		int gid;
2748 	} */ *uap;
2749 {
2750 	struct file *fp;
2751 	int vfslocked;
2752 	int error;
2753 
2754 	AUDIT_ARG(fd, uap->fd);
2755 	AUDIT_ARG(owner, uap->uid, uap->gid);
2756 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2757 		return (error);
2758 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2759 #ifdef AUDIT
2760 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2761 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2762 	VOP_UNLOCK(fp->f_vnode, 0, td);
2763 #endif
2764 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2765 	VFS_UNLOCK_GIANT(vfslocked);
2766 	fdrop(fp, td);
2767 	return (error);
2768 }
2769 
2770 /*
2771  * Common implementation code for utimes(), lutimes(), and futimes().
2772  */
2773 static int
2774 getutimes(usrtvp, tvpseg, tsp)
2775 	const struct timeval *usrtvp;
2776 	enum uio_seg tvpseg;
2777 	struct timespec *tsp;
2778 {
2779 	struct timeval tv[2];
2780 	const struct timeval *tvp;
2781 	int error;
2782 
2783 	if (usrtvp == NULL) {
2784 		microtime(&tv[0]);
2785 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2786 		tsp[1] = tsp[0];
2787 	} else {
2788 		if (tvpseg == UIO_SYSSPACE) {
2789 			tvp = usrtvp;
2790 		} else {
2791 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2792 				return (error);
2793 			tvp = tv;
2794 		}
2795 
2796 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2797 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2798 			return (EINVAL);
2799 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2800 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2801 	}
2802 	return (0);
2803 }
2804 
2805 /*
2806  * Common implementation code for utimes(), lutimes(), and futimes().
2807  */
2808 static int
2809 setutimes(td, vp, ts, numtimes, nullflag)
2810 	struct thread *td;
2811 	struct vnode *vp;
2812 	const struct timespec *ts;
2813 	int numtimes;
2814 	int nullflag;
2815 {
2816 	int error, setbirthtime;
2817 	struct mount *mp;
2818 	struct vattr vattr;
2819 
2820 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2821 		return (error);
2822 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2823 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2824 	setbirthtime = 0;
2825 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2826 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2827 		setbirthtime = 1;
2828 	VATTR_NULL(&vattr);
2829 	vattr.va_atime = ts[0];
2830 	vattr.va_mtime = ts[1];
2831 	if (setbirthtime)
2832 		vattr.va_birthtime = ts[1];
2833 	if (numtimes > 2)
2834 		vattr.va_birthtime = ts[2];
2835 	if (nullflag)
2836 		vattr.va_vaflags |= VA_UTIMES_NULL;
2837 #ifdef MAC
2838 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2839 	    vattr.va_mtime);
2840 #endif
2841 	if (error == 0)
2842 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2843 	VOP_UNLOCK(vp, 0, td);
2844 	vn_finished_write(mp);
2845 	return (error);
2846 }
2847 
2848 /*
2849  * Set the access and modification times of a file.
2850  */
2851 #ifndef _SYS_SYSPROTO_H_
2852 struct utimes_args {
2853 	char	*path;
2854 	struct	timeval *tptr;
2855 };
2856 #endif
2857 int
2858 utimes(td, uap)
2859 	struct thread *td;
2860 	register struct utimes_args /* {
2861 		char *path;
2862 		struct timeval *tptr;
2863 	} */ *uap;
2864 {
2865 
2866 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2867 	    UIO_USERSPACE));
2868 }
2869 
2870 int
2871 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2872     struct timeval *tptr, enum uio_seg tptrseg)
2873 {
2874 	struct timespec ts[2];
2875 	int error;
2876 	struct nameidata nd;
2877 	int vfslocked;
2878 
2879 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2880 		return (error);
2881 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2882 	if ((error = namei(&nd)) != 0)
2883 		return (error);
2884 	vfslocked = NDHASGIANT(&nd);
2885 	NDFREE(&nd, NDF_ONLY_PNBUF);
2886 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2887 	vrele(nd.ni_vp);
2888 	VFS_UNLOCK_GIANT(vfslocked);
2889 	return (error);
2890 }
2891 
2892 /*
2893  * Set the access and modification times of a file.
2894  */
2895 #ifndef _SYS_SYSPROTO_H_
2896 struct lutimes_args {
2897 	char	*path;
2898 	struct	timeval *tptr;
2899 };
2900 #endif
2901 int
2902 lutimes(td, uap)
2903 	struct thread *td;
2904 	register struct lutimes_args /* {
2905 		char *path;
2906 		struct timeval *tptr;
2907 	} */ *uap;
2908 {
2909 
2910 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2911 	    UIO_USERSPACE));
2912 }
2913 
2914 int
2915 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2916     struct timeval *tptr, enum uio_seg tptrseg)
2917 {
2918 	struct timespec ts[2];
2919 	int error;
2920 	struct nameidata nd;
2921 	int vfslocked;
2922 
2923 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2924 		return (error);
2925 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2926 	if ((error = namei(&nd)) != 0)
2927 		return (error);
2928 	vfslocked = NDHASGIANT(&nd);
2929 	NDFREE(&nd, NDF_ONLY_PNBUF);
2930 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2931 	vrele(nd.ni_vp);
2932 	VFS_UNLOCK_GIANT(vfslocked);
2933 	return (error);
2934 }
2935 
2936 /*
2937  * Set the access and modification times of a file.
2938  */
2939 #ifndef _SYS_SYSPROTO_H_
2940 struct futimes_args {
2941 	int	fd;
2942 	struct	timeval *tptr;
2943 };
2944 #endif
2945 int
2946 futimes(td, uap)
2947 	struct thread *td;
2948 	register struct futimes_args /* {
2949 		int  fd;
2950 		struct timeval *tptr;
2951 	} */ *uap;
2952 {
2953 
2954 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2955 }
2956 
2957 int
2958 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2959     enum uio_seg tptrseg)
2960 {
2961 	struct timespec ts[2];
2962 	struct file *fp;
2963 	int vfslocked;
2964 	int error;
2965 
2966 	AUDIT_ARG(fd, fd);
2967 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2968 		return (error);
2969 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2970 		return (error);
2971 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2972 #ifdef AUDIT
2973 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2974 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2975 	VOP_UNLOCK(fp->f_vnode, 0, td);
2976 #endif
2977 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2978 	VFS_UNLOCK_GIANT(vfslocked);
2979 	fdrop(fp, td);
2980 	return (error);
2981 }
2982 
2983 /*
2984  * Truncate a file given its path name.
2985  */
2986 #ifndef _SYS_SYSPROTO_H_
2987 struct truncate_args {
2988 	char	*path;
2989 	int	pad;
2990 	off_t	length;
2991 };
2992 #endif
2993 int
2994 truncate(td, uap)
2995 	struct thread *td;
2996 	register struct truncate_args /* {
2997 		char *path;
2998 		int pad;
2999 		off_t length;
3000 	} */ *uap;
3001 {
3002 
3003 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3004 }
3005 
3006 int
3007 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3008 {
3009 	struct mount *mp;
3010 	struct vnode *vp;
3011 	struct vattr vattr;
3012 	int error;
3013 	struct nameidata nd;
3014 	int vfslocked;
3015 
3016 	if (length < 0)
3017 		return(EINVAL);
3018 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3019 	if ((error = namei(&nd)) != 0)
3020 		return (error);
3021 	vfslocked = NDHASGIANT(&nd);
3022 	vp = nd.ni_vp;
3023 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3024 		vrele(vp);
3025 		VFS_UNLOCK_GIANT(vfslocked);
3026 		return (error);
3027 	}
3028 	NDFREE(&nd, NDF_ONLY_PNBUF);
3029 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3030 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3031 	if (vp->v_type == VDIR)
3032 		error = EISDIR;
3033 #ifdef MAC
3034 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3035 	}
3036 #endif
3037 	else if ((error = vn_writechk(vp)) == 0 &&
3038 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3039 		VATTR_NULL(&vattr);
3040 		vattr.va_size = length;
3041 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3042 	}
3043 	vput(vp);
3044 	vn_finished_write(mp);
3045 	VFS_UNLOCK_GIANT(vfslocked);
3046 	return (error);
3047 }
3048 
3049 /*
3050  * Truncate a file given a file descriptor.
3051  */
3052 #ifndef _SYS_SYSPROTO_H_
3053 struct ftruncate_args {
3054 	int	fd;
3055 	int	pad;
3056 	off_t	length;
3057 };
3058 #endif
3059 int
3060 ftruncate(td, uap)
3061 	struct thread *td;
3062 	register struct ftruncate_args /* {
3063 		int fd;
3064 		int pad;
3065 		off_t length;
3066 	} */ *uap;
3067 {
3068 	struct mount *mp;
3069 	struct vattr vattr;
3070 	struct vnode *vp;
3071 	struct file *fp;
3072 	int vfslocked;
3073 	int error;
3074 
3075 	AUDIT_ARG(fd, uap->fd);
3076 	if (uap->length < 0)
3077 		return(EINVAL);
3078 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3079 		return (error);
3080 	if ((fp->f_flag & FWRITE) == 0) {
3081 		fdrop(fp, td);
3082 		return (EINVAL);
3083 	}
3084 	vp = fp->f_vnode;
3085 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3086 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3087 		goto drop;
3088 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3089 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3090 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3091 	if (vp->v_type == VDIR)
3092 		error = EISDIR;
3093 #ifdef MAC
3094 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3095 	    vp))) {
3096 	}
3097 #endif
3098 	else if ((error = vn_writechk(vp)) == 0) {
3099 		VATTR_NULL(&vattr);
3100 		vattr.va_size = uap->length;
3101 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3102 	}
3103 	VOP_UNLOCK(vp, 0, td);
3104 	vn_finished_write(mp);
3105 drop:
3106 	VFS_UNLOCK_GIANT(vfslocked);
3107 	fdrop(fp, td);
3108 	return (error);
3109 }
3110 
3111 #if defined(COMPAT_43)
3112 /*
3113  * Truncate a file given its path name.
3114  */
3115 #ifndef _SYS_SYSPROTO_H_
3116 struct otruncate_args {
3117 	char	*path;
3118 	long	length;
3119 };
3120 #endif
3121 int
3122 otruncate(td, uap)
3123 	struct thread *td;
3124 	register struct otruncate_args /* {
3125 		char *path;
3126 		long length;
3127 	} */ *uap;
3128 {
3129 	struct truncate_args /* {
3130 		char *path;
3131 		int pad;
3132 		off_t length;
3133 	} */ nuap;
3134 
3135 	nuap.path = uap->path;
3136 	nuap.length = uap->length;
3137 	return (truncate(td, &nuap));
3138 }
3139 
3140 /*
3141  * Truncate a file given a file descriptor.
3142  */
3143 #ifndef _SYS_SYSPROTO_H_
3144 struct oftruncate_args {
3145 	int	fd;
3146 	long	length;
3147 };
3148 #endif
3149 int
3150 oftruncate(td, uap)
3151 	struct thread *td;
3152 	register struct oftruncate_args /* {
3153 		int fd;
3154 		long length;
3155 	} */ *uap;
3156 {
3157 	struct ftruncate_args /* {
3158 		int fd;
3159 		int pad;
3160 		off_t length;
3161 	} */ nuap;
3162 
3163 	nuap.fd = uap->fd;
3164 	nuap.length = uap->length;
3165 	return (ftruncate(td, &nuap));
3166 }
3167 #endif /* COMPAT_43 */
3168 
3169 /*
3170  * Sync an open file.
3171  */
3172 #ifndef _SYS_SYSPROTO_H_
3173 struct fsync_args {
3174 	int	fd;
3175 };
3176 #endif
3177 int
3178 fsync(td, uap)
3179 	struct thread *td;
3180 	struct fsync_args /* {
3181 		int fd;
3182 	} */ *uap;
3183 {
3184 	struct vnode *vp;
3185 	struct mount *mp;
3186 	struct file *fp;
3187 	int vfslocked;
3188 	int error;
3189 
3190 	AUDIT_ARG(fd, uap->fd);
3191 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3192 		return (error);
3193 	vp = fp->f_vnode;
3194 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3195 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3196 		goto drop;
3197 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3198 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3199 	if (vp->v_object != NULL) {
3200 		VM_OBJECT_LOCK(vp->v_object);
3201 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3202 		VM_OBJECT_UNLOCK(vp->v_object);
3203 	}
3204 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3205 
3206 	VOP_UNLOCK(vp, 0, td);
3207 	vn_finished_write(mp);
3208 drop:
3209 	VFS_UNLOCK_GIANT(vfslocked);
3210 	fdrop(fp, td);
3211 	return (error);
3212 }
3213 
3214 /*
3215  * Rename files.  Source and destination must either both be directories, or
3216  * both not be directories.  If target is a directory, it must be empty.
3217  */
3218 #ifndef _SYS_SYSPROTO_H_
3219 struct rename_args {
3220 	char	*from;
3221 	char	*to;
3222 };
3223 #endif
3224 int
3225 rename(td, uap)
3226 	struct thread *td;
3227 	register struct rename_args /* {
3228 		char *from;
3229 		char *to;
3230 	} */ *uap;
3231 {
3232 
3233 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3234 }
3235 
3236 int
3237 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3238 {
3239 	struct mount *mp = NULL;
3240 	struct vnode *tvp, *fvp, *tdvp;
3241 	struct nameidata fromnd, tond;
3242 	int tvfslocked;
3243 	int fvfslocked;
3244 	int error;
3245 
3246 	bwillwrite();
3247 #ifdef MAC
3248 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3249 	    AUDITVNODE1, pathseg, from, td);
3250 #else
3251 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3252 	    AUDITVNODE1, pathseg, from, td);
3253 #endif
3254 	if ((error = namei(&fromnd)) != 0)
3255 		return (error);
3256 	fvfslocked = NDHASGIANT(&fromnd);
3257 	tvfslocked = 0;
3258 #ifdef MAC
3259 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3260 	    fromnd.ni_vp, &fromnd.ni_cnd);
3261 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3262 	if (fromnd.ni_dvp != fromnd.ni_vp)
3263 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3264 #endif
3265 	fvp = fromnd.ni_vp;
3266 	if (error == 0)
3267 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3268 	if (error != 0) {
3269 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3270 		vrele(fromnd.ni_dvp);
3271 		vrele(fvp);
3272 		goto out1;
3273 	}
3274 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3275 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3276 	if (fromnd.ni_vp->v_type == VDIR)
3277 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3278 	if ((error = namei(&tond)) != 0) {
3279 		/* Translate error code for rename("dir1", "dir2/."). */
3280 		if (error == EISDIR && fvp->v_type == VDIR)
3281 			error = EINVAL;
3282 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3283 		vrele(fromnd.ni_dvp);
3284 		vrele(fvp);
3285 		vn_finished_write(mp);
3286 		goto out1;
3287 	}
3288 	tvfslocked = NDHASGIANT(&tond);
3289 	tdvp = tond.ni_dvp;
3290 	tvp = tond.ni_vp;
3291 	if (tvp != NULL) {
3292 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3293 			error = ENOTDIR;
3294 			goto out;
3295 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3296 			error = EISDIR;
3297 			goto out;
3298 		}
3299 	}
3300 	if (fvp == tdvp)
3301 		error = EINVAL;
3302 	/*
3303 	 * If the source is the same as the destination (that is, if they
3304 	 * are links to the same vnode), then there is nothing to do.
3305 	 */
3306 	if (fvp == tvp)
3307 		error = -1;
3308 #ifdef MAC
3309 	else
3310 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3311 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3312 #endif
3313 out:
3314 	if (!error) {
3315 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3316 		if (fromnd.ni_dvp != tdvp) {
3317 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3318 		}
3319 		if (tvp) {
3320 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3321 		}
3322 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3323 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3324 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3325 		NDFREE(&tond, NDF_ONLY_PNBUF);
3326 	} else {
3327 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3328 		NDFREE(&tond, NDF_ONLY_PNBUF);
3329 		if (tvp)
3330 			vput(tvp);
3331 		if (tdvp == tvp)
3332 			vrele(tdvp);
3333 		else
3334 			vput(tdvp);
3335 		vrele(fromnd.ni_dvp);
3336 		vrele(fvp);
3337 	}
3338 	vrele(tond.ni_startdir);
3339 	vn_finished_write(mp);
3340 out1:
3341 	if (fromnd.ni_startdir)
3342 		vrele(fromnd.ni_startdir);
3343 	VFS_UNLOCK_GIANT(fvfslocked);
3344 	VFS_UNLOCK_GIANT(tvfslocked);
3345 	if (error == -1)
3346 		return (0);
3347 	return (error);
3348 }
3349 
3350 /*
3351  * Make a directory file.
3352  */
3353 #ifndef _SYS_SYSPROTO_H_
3354 struct mkdir_args {
3355 	char	*path;
3356 	int	mode;
3357 };
3358 #endif
3359 int
3360 mkdir(td, uap)
3361 	struct thread *td;
3362 	register struct mkdir_args /* {
3363 		char *path;
3364 		int mode;
3365 	} */ *uap;
3366 {
3367 
3368 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3369 }
3370 
3371 int
3372 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3373 {
3374 	struct mount *mp;
3375 	struct vnode *vp;
3376 	struct vattr vattr;
3377 	int error;
3378 	struct nameidata nd;
3379 	int vfslocked;
3380 
3381 	AUDIT_ARG(mode, mode);
3382 restart:
3383 	bwillwrite();
3384 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3385 	    segflg, path, td);
3386 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3387 	if ((error = namei(&nd)) != 0)
3388 		return (error);
3389 	vfslocked = NDHASGIANT(&nd);
3390 	vp = nd.ni_vp;
3391 	if (vp != NULL) {
3392 		NDFREE(&nd, NDF_ONLY_PNBUF);
3393 		/*
3394 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3395 		 * the strange behaviour of leaving the vnode unlocked
3396 		 * if the target is the same vnode as the parent.
3397 		 */
3398 		if (vp == nd.ni_dvp)
3399 			vrele(nd.ni_dvp);
3400 		else
3401 			vput(nd.ni_dvp);
3402 		vrele(vp);
3403 		VFS_UNLOCK_GIANT(vfslocked);
3404 		return (EEXIST);
3405 	}
3406 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3407 		NDFREE(&nd, NDF_ONLY_PNBUF);
3408 		vput(nd.ni_dvp);
3409 		VFS_UNLOCK_GIANT(vfslocked);
3410 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3411 			return (error);
3412 		goto restart;
3413 	}
3414 	VATTR_NULL(&vattr);
3415 	vattr.va_type = VDIR;
3416 	FILEDESC_SLOCK(td->td_proc->p_fd);
3417 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3418 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3419 #ifdef MAC
3420 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3421 	    &vattr);
3422 	if (error)
3423 		goto out;
3424 #endif
3425 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3426 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3427 #ifdef MAC
3428 out:
3429 #endif
3430 	NDFREE(&nd, NDF_ONLY_PNBUF);
3431 	vput(nd.ni_dvp);
3432 	if (!error)
3433 		vput(nd.ni_vp);
3434 	vn_finished_write(mp);
3435 	VFS_UNLOCK_GIANT(vfslocked);
3436 	return (error);
3437 }
3438 
3439 /*
3440  * Remove a directory file.
3441  */
3442 #ifndef _SYS_SYSPROTO_H_
3443 struct rmdir_args {
3444 	char	*path;
3445 };
3446 #endif
3447 int
3448 rmdir(td, uap)
3449 	struct thread *td;
3450 	struct rmdir_args /* {
3451 		char *path;
3452 	} */ *uap;
3453 {
3454 
3455 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3456 }
3457 
3458 int
3459 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3460 {
3461 	struct mount *mp;
3462 	struct vnode *vp;
3463 	int error;
3464 	struct nameidata nd;
3465 	int vfslocked;
3466 
3467 restart:
3468 	bwillwrite();
3469 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3470 	    pathseg, path, td);
3471 	if ((error = namei(&nd)) != 0)
3472 		return (error);
3473 	vfslocked = NDHASGIANT(&nd);
3474 	vp = nd.ni_vp;
3475 	if (vp->v_type != VDIR) {
3476 		error = ENOTDIR;
3477 		goto out;
3478 	}
3479 	/*
3480 	 * No rmdir "." please.
3481 	 */
3482 	if (nd.ni_dvp == vp) {
3483 		error = EINVAL;
3484 		goto out;
3485 	}
3486 	/*
3487 	 * The root of a mounted filesystem cannot be deleted.
3488 	 */
3489 	if (vp->v_vflag & VV_ROOT) {
3490 		error = EBUSY;
3491 		goto out;
3492 	}
3493 #ifdef MAC
3494 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3495 	    &nd.ni_cnd);
3496 	if (error)
3497 		goto out;
3498 #endif
3499 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3500 		NDFREE(&nd, NDF_ONLY_PNBUF);
3501 		vput(vp);
3502 		if (nd.ni_dvp == vp)
3503 			vrele(nd.ni_dvp);
3504 		else
3505 			vput(nd.ni_dvp);
3506 		VFS_UNLOCK_GIANT(vfslocked);
3507 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3508 			return (error);
3509 		goto restart;
3510 	}
3511 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3512 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3513 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3514 	vn_finished_write(mp);
3515 out:
3516 	NDFREE(&nd, NDF_ONLY_PNBUF);
3517 	vput(vp);
3518 	if (nd.ni_dvp == vp)
3519 		vrele(nd.ni_dvp);
3520 	else
3521 		vput(nd.ni_dvp);
3522 	VFS_UNLOCK_GIANT(vfslocked);
3523 	return (error);
3524 }
3525 
3526 #ifdef COMPAT_43
3527 /*
3528  * Read a block of directory entries in a filesystem independent format.
3529  */
3530 #ifndef _SYS_SYSPROTO_H_
3531 struct ogetdirentries_args {
3532 	int	fd;
3533 	char	*buf;
3534 	u_int	count;
3535 	long	*basep;
3536 };
3537 #endif
3538 int
3539 ogetdirentries(td, uap)
3540 	struct thread *td;
3541 	register struct ogetdirentries_args /* {
3542 		int fd;
3543 		char *buf;
3544 		u_int count;
3545 		long *basep;
3546 	} */ *uap;
3547 {
3548 	struct vnode *vp;
3549 	struct file *fp;
3550 	struct uio auio, kuio;
3551 	struct iovec aiov, kiov;
3552 	struct dirent *dp, *edp;
3553 	caddr_t dirbuf;
3554 	int error, eofflag, readcnt, vfslocked;
3555 	long loff;
3556 
3557 	/* XXX arbitrary sanity limit on `count'. */
3558 	if (uap->count > 64 * 1024)
3559 		return (EINVAL);
3560 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3561 		return (error);
3562 	if ((fp->f_flag & FREAD) == 0) {
3563 		fdrop(fp, td);
3564 		return (EBADF);
3565 	}
3566 	vp = fp->f_vnode;
3567 unionread:
3568 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3569 	if (vp->v_type != VDIR) {
3570 		VFS_UNLOCK_GIANT(vfslocked);
3571 		fdrop(fp, td);
3572 		return (EINVAL);
3573 	}
3574 	aiov.iov_base = uap->buf;
3575 	aiov.iov_len = uap->count;
3576 	auio.uio_iov = &aiov;
3577 	auio.uio_iovcnt = 1;
3578 	auio.uio_rw = UIO_READ;
3579 	auio.uio_segflg = UIO_USERSPACE;
3580 	auio.uio_td = td;
3581 	auio.uio_resid = uap->count;
3582 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3583 	loff = auio.uio_offset = fp->f_offset;
3584 #ifdef MAC
3585 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3586 	if (error) {
3587 		VOP_UNLOCK(vp, 0, td);
3588 		VFS_UNLOCK_GIANT(vfslocked);
3589 		fdrop(fp, td);
3590 		return (error);
3591 	}
3592 #endif
3593 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3594 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3595 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3596 			    NULL, NULL);
3597 			fp->f_offset = auio.uio_offset;
3598 		} else
3599 #	endif
3600 	{
3601 		kuio = auio;
3602 		kuio.uio_iov = &kiov;
3603 		kuio.uio_segflg = UIO_SYSSPACE;
3604 		kiov.iov_len = uap->count;
3605 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3606 		kiov.iov_base = dirbuf;
3607 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3608 			    NULL, NULL);
3609 		fp->f_offset = kuio.uio_offset;
3610 		if (error == 0) {
3611 			readcnt = uap->count - kuio.uio_resid;
3612 			edp = (struct dirent *)&dirbuf[readcnt];
3613 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3614 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3615 					/*
3616 					 * The expected low byte of
3617 					 * dp->d_namlen is our dp->d_type.
3618 					 * The high MBZ byte of dp->d_namlen
3619 					 * is our dp->d_namlen.
3620 					 */
3621 					dp->d_type = dp->d_namlen;
3622 					dp->d_namlen = 0;
3623 #				else
3624 					/*
3625 					 * The dp->d_type is the high byte
3626 					 * of the expected dp->d_namlen,
3627 					 * so must be zero'ed.
3628 					 */
3629 					dp->d_type = 0;
3630 #				endif
3631 				if (dp->d_reclen > 0) {
3632 					dp = (struct dirent *)
3633 					    ((char *)dp + dp->d_reclen);
3634 				} else {
3635 					error = EIO;
3636 					break;
3637 				}
3638 			}
3639 			if (dp >= edp)
3640 				error = uiomove(dirbuf, readcnt, &auio);
3641 		}
3642 		FREE(dirbuf, M_TEMP);
3643 	}
3644 	if (error) {
3645 		VOP_UNLOCK(vp, 0, td);
3646 		VFS_UNLOCK_GIANT(vfslocked);
3647 		fdrop(fp, td);
3648 		return (error);
3649 	}
3650 	if (uap->count == auio.uio_resid &&
3651 	    (vp->v_vflag & VV_ROOT) &&
3652 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3653 		struct vnode *tvp = vp;
3654 		vp = vp->v_mount->mnt_vnodecovered;
3655 		VREF(vp);
3656 		fp->f_vnode = vp;
3657 		fp->f_data = vp;
3658 		fp->f_offset = 0;
3659 		vput(tvp);
3660 		VFS_UNLOCK_GIANT(vfslocked);
3661 		goto unionread;
3662 	}
3663 	VOP_UNLOCK(vp, 0, td);
3664 	VFS_UNLOCK_GIANT(vfslocked);
3665 	error = copyout(&loff, uap->basep, sizeof(long));
3666 	fdrop(fp, td);
3667 	td->td_retval[0] = uap->count - auio.uio_resid;
3668 	return (error);
3669 }
3670 #endif /* COMPAT_43 */
3671 
3672 /*
3673  * Read a block of directory entries in a filesystem independent format.
3674  */
3675 #ifndef _SYS_SYSPROTO_H_
3676 struct getdirentries_args {
3677 	int	fd;
3678 	char	*buf;
3679 	u_int	count;
3680 	long	*basep;
3681 };
3682 #endif
3683 int
3684 getdirentries(td, uap)
3685 	struct thread *td;
3686 	register struct getdirentries_args /* {
3687 		int fd;
3688 		char *buf;
3689 		u_int count;
3690 		long *basep;
3691 	} */ *uap;
3692 {
3693 	struct vnode *vp;
3694 	struct file *fp;
3695 	struct uio auio;
3696 	struct iovec aiov;
3697 	int vfslocked;
3698 	long loff;
3699 	int error, eofflag;
3700 
3701 	AUDIT_ARG(fd, uap->fd);
3702 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3703 		return (error);
3704 	if ((fp->f_flag & FREAD) == 0) {
3705 		fdrop(fp, td);
3706 		return (EBADF);
3707 	}
3708 	vp = fp->f_vnode;
3709 unionread:
3710 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3711 	if (vp->v_type != VDIR) {
3712 		VFS_UNLOCK_GIANT(vfslocked);
3713 		error = EINVAL;
3714 		goto fail;
3715 	}
3716 	aiov.iov_base = uap->buf;
3717 	aiov.iov_len = uap->count;
3718 	auio.uio_iov = &aiov;
3719 	auio.uio_iovcnt = 1;
3720 	auio.uio_rw = UIO_READ;
3721 	auio.uio_segflg = UIO_USERSPACE;
3722 	auio.uio_td = td;
3723 	auio.uio_resid = uap->count;
3724 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3725 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3726 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3727 	loff = auio.uio_offset = fp->f_offset;
3728 #ifdef MAC
3729 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3730 	if (error == 0)
3731 #endif
3732 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3733 		    NULL);
3734 	fp->f_offset = auio.uio_offset;
3735 	if (error) {
3736 		VOP_UNLOCK(vp, 0, td);
3737 		VFS_UNLOCK_GIANT(vfslocked);
3738 		goto fail;
3739 	}
3740 	if (uap->count == auio.uio_resid &&
3741 	    (vp->v_vflag & VV_ROOT) &&
3742 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3743 		struct vnode *tvp = vp;
3744 		vp = vp->v_mount->mnt_vnodecovered;
3745 		VREF(vp);
3746 		fp->f_vnode = vp;
3747 		fp->f_data = vp;
3748 		fp->f_offset = 0;
3749 		vput(tvp);
3750 		VFS_UNLOCK_GIANT(vfslocked);
3751 		goto unionread;
3752 	}
3753 	VOP_UNLOCK(vp, 0, td);
3754 	VFS_UNLOCK_GIANT(vfslocked);
3755 	if (uap->basep != NULL) {
3756 		error = copyout(&loff, uap->basep, sizeof(long));
3757 	}
3758 	td->td_retval[0] = uap->count - auio.uio_resid;
3759 fail:
3760 	fdrop(fp, td);
3761 	return (error);
3762 }
3763 
3764 #ifndef _SYS_SYSPROTO_H_
3765 struct getdents_args {
3766 	int fd;
3767 	char *buf;
3768 	size_t count;
3769 };
3770 #endif
3771 int
3772 getdents(td, uap)
3773 	struct thread *td;
3774 	register struct getdents_args /* {
3775 		int fd;
3776 		char *buf;
3777 		u_int count;
3778 	} */ *uap;
3779 {
3780 	struct getdirentries_args ap;
3781 	ap.fd = uap->fd;
3782 	ap.buf = uap->buf;
3783 	ap.count = uap->count;
3784 	ap.basep = NULL;
3785 	return (getdirentries(td, &ap));
3786 }
3787 
3788 /*
3789  * Set the mode mask for creation of filesystem nodes.
3790  */
3791 #ifndef _SYS_SYSPROTO_H_
3792 struct umask_args {
3793 	int	newmask;
3794 };
3795 #endif
3796 int
3797 umask(td, uap)
3798 	struct thread *td;
3799 	struct umask_args /* {
3800 		int newmask;
3801 	} */ *uap;
3802 {
3803 	register struct filedesc *fdp;
3804 
3805 	FILEDESC_XLOCK(td->td_proc->p_fd);
3806 	fdp = td->td_proc->p_fd;
3807 	td->td_retval[0] = fdp->fd_cmask;
3808 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3809 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
3810 	return (0);
3811 }
3812 
3813 /*
3814  * Void all references to file by ripping underlying filesystem away from
3815  * vnode.
3816  */
3817 #ifndef _SYS_SYSPROTO_H_
3818 struct revoke_args {
3819 	char	*path;
3820 };
3821 #endif
3822 int
3823 revoke(td, uap)
3824 	struct thread *td;
3825 	register struct revoke_args /* {
3826 		char *path;
3827 	} */ *uap;
3828 {
3829 	struct vnode *vp;
3830 	struct vattr vattr;
3831 	int error;
3832 	struct nameidata nd;
3833 	int vfslocked;
3834 
3835 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3836 	    UIO_USERSPACE, uap->path, td);
3837 	if ((error = namei(&nd)) != 0)
3838 		return (error);
3839 	vfslocked = NDHASGIANT(&nd);
3840 	vp = nd.ni_vp;
3841 	NDFREE(&nd, NDF_ONLY_PNBUF);
3842 	if (vp->v_type != VCHR) {
3843 		error = EINVAL;
3844 		goto out;
3845 	}
3846 #ifdef MAC
3847 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3848 	if (error)
3849 		goto out;
3850 #endif
3851 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3852 	if (error)
3853 		goto out;
3854 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3855 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3856 		    SUSER_ALLOWJAIL);
3857 		if (error)
3858 			goto out;
3859 	}
3860 	if (vcount(vp) > 1)
3861 		VOP_REVOKE(vp, REVOKEALL);
3862 out:
3863 	vput(vp);
3864 	VFS_UNLOCK_GIANT(vfslocked);
3865 	return (error);
3866 }
3867 
3868 /*
3869  * Convert a user file descriptor to a kernel file entry.
3870  * A reference on the file entry is held upon returning.
3871  */
3872 int
3873 getvnode(fdp, fd, fpp)
3874 	struct filedesc *fdp;
3875 	int fd;
3876 	struct file **fpp;
3877 {
3878 	int error;
3879 	struct file *fp;
3880 
3881 	fp = NULL;
3882 	if (fdp == NULL)
3883 		error = EBADF;
3884 	else {
3885 		FILEDESC_SLOCK(fdp);
3886 		if ((u_int)fd >= fdp->fd_nfiles ||
3887 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3888 			error = EBADF;
3889 		else if (fp->f_vnode == NULL) {
3890 			fp = NULL;
3891 			error = EINVAL;
3892 		} else {
3893 			fhold(fp);
3894 			error = 0;
3895 		}
3896 		FILEDESC_SUNLOCK(fdp);
3897 	}
3898 	*fpp = fp;
3899 	return (error);
3900 }
3901 
3902 /*
3903  * Get an (NFS) file handle.
3904  */
3905 #ifndef _SYS_SYSPROTO_H_
3906 struct lgetfh_args {
3907 	char	*fname;
3908 	fhandle_t *fhp;
3909 };
3910 #endif
3911 int
3912 lgetfh(td, uap)
3913 	struct thread *td;
3914 	register struct lgetfh_args *uap;
3915 {
3916 	struct nameidata nd;
3917 	fhandle_t fh;
3918 	register struct vnode *vp;
3919 	int vfslocked;
3920 	int error;
3921 
3922 	error = priv_check(td, PRIV_VFS_GETFH);
3923 	if (error)
3924 		return (error);
3925 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3926 	    UIO_USERSPACE, uap->fname, td);
3927 	error = namei(&nd);
3928 	if (error)
3929 		return (error);
3930 	vfslocked = NDHASGIANT(&nd);
3931 	NDFREE(&nd, NDF_ONLY_PNBUF);
3932 	vp = nd.ni_vp;
3933 	bzero(&fh, sizeof(fh));
3934 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3935 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3936 	vput(vp);
3937 	VFS_UNLOCK_GIANT(vfslocked);
3938 	if (error)
3939 		return (error);
3940 	error = copyout(&fh, uap->fhp, sizeof (fh));
3941 	return (error);
3942 }
3943 
3944 #ifndef _SYS_SYSPROTO_H_
3945 struct getfh_args {
3946 	char	*fname;
3947 	fhandle_t *fhp;
3948 };
3949 #endif
3950 int
3951 getfh(td, uap)
3952 	struct thread *td;
3953 	register struct getfh_args *uap;
3954 {
3955 	struct nameidata nd;
3956 	fhandle_t fh;
3957 	register struct vnode *vp;
3958 	int vfslocked;
3959 	int error;
3960 
3961 	error = priv_check(td, PRIV_VFS_GETFH);
3962 	if (error)
3963 		return (error);
3964 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3965 	    UIO_USERSPACE, uap->fname, td);
3966 	error = namei(&nd);
3967 	if (error)
3968 		return (error);
3969 	vfslocked = NDHASGIANT(&nd);
3970 	NDFREE(&nd, NDF_ONLY_PNBUF);
3971 	vp = nd.ni_vp;
3972 	bzero(&fh, sizeof(fh));
3973 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3974 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3975 	vput(vp);
3976 	VFS_UNLOCK_GIANT(vfslocked);
3977 	if (error)
3978 		return (error);
3979 	error = copyout(&fh, uap->fhp, sizeof (fh));
3980 	return (error);
3981 }
3982 
3983 /*
3984  * syscall for the rpc.lockd to use to translate a NFS file handle into an
3985  * open descriptor.
3986  *
3987  * warning: do not remove the priv_check() call or this becomes one giant
3988  * security hole.
3989  */
3990 #ifndef _SYS_SYSPROTO_H_
3991 struct fhopen_args {
3992 	const struct fhandle *u_fhp;
3993 	int flags;
3994 };
3995 #endif
3996 int
3997 fhopen(td, uap)
3998 	struct thread *td;
3999 	struct fhopen_args /* {
4000 		const struct fhandle *u_fhp;
4001 		int flags;
4002 	} */ *uap;
4003 {
4004 	struct proc *p = td->td_proc;
4005 	struct mount *mp;
4006 	struct vnode *vp;
4007 	struct fhandle fhp;
4008 	struct vattr vat;
4009 	struct vattr *vap = &vat;
4010 	struct flock lf;
4011 	struct file *fp;
4012 	register struct filedesc *fdp = p->p_fd;
4013 	int fmode, mode, error, type;
4014 	struct file *nfp;
4015 	int vfslocked;
4016 	int indx;
4017 
4018 	error = priv_check(td, PRIV_VFS_FHOPEN);
4019 	if (error)
4020 		return (error);
4021 	fmode = FFLAGS(uap->flags);
4022 	/* why not allow a non-read/write open for our lockd? */
4023 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4024 		return (EINVAL);
4025 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4026 	if (error)
4027 		return(error);
4028 	/* find the mount point */
4029 	mp = vfs_getvfs(&fhp.fh_fsid);
4030 	if (mp == NULL)
4031 		return (ESTALE);
4032 	vfslocked = VFS_LOCK_GIANT(mp);
4033 	/* now give me my vnode, it gets returned to me locked */
4034 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4035 	if (error)
4036 		goto out;
4037 	/*
4038 	 * from now on we have to make sure not
4039 	 * to forget about the vnode
4040 	 * any error that causes an abort must vput(vp)
4041 	 * just set error = err and 'goto bad;'.
4042 	 */
4043 
4044 	/*
4045 	 * from vn_open
4046 	 */
4047 	if (vp->v_type == VLNK) {
4048 		error = EMLINK;
4049 		goto bad;
4050 	}
4051 	if (vp->v_type == VSOCK) {
4052 		error = EOPNOTSUPP;
4053 		goto bad;
4054 	}
4055 	mode = 0;
4056 	if (fmode & (FWRITE | O_TRUNC)) {
4057 		if (vp->v_type == VDIR) {
4058 			error = EISDIR;
4059 			goto bad;
4060 		}
4061 		error = vn_writechk(vp);
4062 		if (error)
4063 			goto bad;
4064 		mode |= VWRITE;
4065 	}
4066 	if (fmode & FREAD)
4067 		mode |= VREAD;
4068 	if (fmode & O_APPEND)
4069 		mode |= VAPPEND;
4070 #ifdef MAC
4071 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4072 	if (error)
4073 		goto bad;
4074 #endif
4075 	if (mode) {
4076 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4077 		if (error)
4078 			goto bad;
4079 	}
4080 	if (fmode & O_TRUNC) {
4081 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4082 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4083 			vrele(vp);
4084 			goto out;
4085 		}
4086 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4087 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4088 #ifdef MAC
4089 		/*
4090 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4091 		 * should be right.
4092 		 */
4093 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4094 		if (error == 0) {
4095 #endif
4096 			VATTR_NULL(vap);
4097 			vap->va_size = 0;
4098 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4099 #ifdef MAC
4100 		}
4101 #endif
4102 		vn_finished_write(mp);
4103 		if (error)
4104 			goto bad;
4105 	}
4106 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4107 	if (error)
4108 		goto bad;
4109 
4110 	if (fmode & FWRITE)
4111 		vp->v_writecount++;
4112 
4113 	/*
4114 	 * end of vn_open code
4115 	 */
4116 
4117 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4118 		if (fmode & FWRITE)
4119 			vp->v_writecount--;
4120 		goto bad;
4121 	}
4122 	/* An extra reference on `nfp' has been held for us by falloc(). */
4123 	fp = nfp;
4124 
4125 	FILE_LOCK(nfp);
4126 	nfp->f_vnode = vp;
4127 	nfp->f_data = vp;
4128 	nfp->f_flag = fmode & FMASK;
4129 	nfp->f_type = DTYPE_VNODE;
4130 	nfp->f_ops = &vnops;
4131 	FILE_UNLOCK(nfp);
4132 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4133 		lf.l_whence = SEEK_SET;
4134 		lf.l_start = 0;
4135 		lf.l_len = 0;
4136 		if (fmode & O_EXLOCK)
4137 			lf.l_type = F_WRLCK;
4138 		else
4139 			lf.l_type = F_RDLCK;
4140 		type = F_FLOCK;
4141 		if ((fmode & FNONBLOCK) == 0)
4142 			type |= F_WAIT;
4143 		VOP_UNLOCK(vp, 0, td);
4144 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4145 			    type)) != 0) {
4146 			/*
4147 			 * The lock request failed.  Normally close the
4148 			 * descriptor but handle the case where someone might
4149 			 * have dup()d or close()d it when we weren't looking.
4150 			 */
4151 			fdclose(fdp, fp, indx, td);
4152 
4153 			/*
4154 			 * release our private reference
4155 			 */
4156 			fdrop(fp, td);
4157 			goto out;
4158 		}
4159 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4160 		fp->f_flag |= FHASLOCK;
4161 	}
4162 
4163 	VOP_UNLOCK(vp, 0, td);
4164 	fdrop(fp, td);
4165 	vfs_rel(mp);
4166 	VFS_UNLOCK_GIANT(vfslocked);
4167 	td->td_retval[0] = indx;
4168 	return (0);
4169 
4170 bad:
4171 	vput(vp);
4172 out:
4173 	vfs_rel(mp);
4174 	VFS_UNLOCK_GIANT(vfslocked);
4175 	return (error);
4176 }
4177 
4178 /*
4179  * Stat an (NFS) file handle.
4180  */
4181 #ifndef _SYS_SYSPROTO_H_
4182 struct fhstat_args {
4183 	struct fhandle *u_fhp;
4184 	struct stat *sb;
4185 };
4186 #endif
4187 int
4188 fhstat(td, uap)
4189 	struct thread *td;
4190 	register struct fhstat_args /* {
4191 		struct fhandle *u_fhp;
4192 		struct stat *sb;
4193 	} */ *uap;
4194 {
4195 	struct stat sb;
4196 	fhandle_t fh;
4197 	struct mount *mp;
4198 	struct vnode *vp;
4199 	int vfslocked;
4200 	int error;
4201 
4202 	error = priv_check(td, PRIV_VFS_FHSTAT);
4203 	if (error)
4204 		return (error);
4205 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4206 	if (error)
4207 		return (error);
4208 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4209 		return (ESTALE);
4210 	vfslocked = VFS_LOCK_GIANT(mp);
4211 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4212 		vfs_rel(mp);
4213 		VFS_UNLOCK_GIANT(vfslocked);
4214 		return (error);
4215 	}
4216 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4217 	vput(vp);
4218 	vfs_rel(mp);
4219 	VFS_UNLOCK_GIANT(vfslocked);
4220 	if (error)
4221 		return (error);
4222 	error = copyout(&sb, uap->sb, sizeof(sb));
4223 	return (error);
4224 }
4225 
4226 /*
4227  * Implement fstatfs() for (NFS) file handles.
4228  */
4229 #ifndef _SYS_SYSPROTO_H_
4230 struct fhstatfs_args {
4231 	struct fhandle *u_fhp;
4232 	struct statfs *buf;
4233 };
4234 #endif
4235 int
4236 fhstatfs(td, uap)
4237 	struct thread *td;
4238 	struct fhstatfs_args /* {
4239 		struct fhandle *u_fhp;
4240 		struct statfs *buf;
4241 	} */ *uap;
4242 {
4243 	struct statfs sf;
4244 	fhandle_t fh;
4245 	int error;
4246 
4247 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4248 	if (error)
4249 		return (error);
4250 	error = kern_fhstatfs(td, fh, &sf);
4251 	if (error)
4252 		return (error);
4253 	return (copyout(&sf, uap->buf, sizeof(sf)));
4254 }
4255 
4256 int
4257 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4258 {
4259 	struct statfs *sp;
4260 	struct mount *mp;
4261 	struct vnode *vp;
4262 	int vfslocked;
4263 	int error;
4264 
4265 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4266 	if (error)
4267 		return (error);
4268 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4269 		return (ESTALE);
4270 	vfslocked = VFS_LOCK_GIANT(mp);
4271 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4272 	if (error) {
4273 		VFS_UNLOCK_GIANT(vfslocked);
4274 		vfs_rel(mp);
4275 		return (error);
4276 	}
4277 	vput(vp);
4278 	error = prison_canseemount(td->td_ucred, mp);
4279 	if (error)
4280 		goto out;
4281 #ifdef MAC
4282 	error = mac_check_mount_stat(td->td_ucred, mp);
4283 	if (error)
4284 		goto out;
4285 #endif
4286 	/*
4287 	 * Set these in case the underlying filesystem fails to do so.
4288 	 */
4289 	sp = &mp->mnt_stat;
4290 	sp->f_version = STATFS_VERSION;
4291 	sp->f_namemax = NAME_MAX;
4292 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4293 	error = VFS_STATFS(mp, sp, td);
4294 	if (error == 0)
4295 		*buf = *sp;
4296 out:
4297 	vfs_rel(mp);
4298 	VFS_UNLOCK_GIANT(vfslocked);
4299 	return (error);
4300 }
4301