xref: /freebsd/sys/kern/vfs_syscalls.c (revision bfe691b2f75de2224c7ceb304ebcdef2b42d4179)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/priv.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 /*
92  * The module initialization routine for POSIX asynchronous I/O will
93  * set this to the version of AIO that it implements.  (Zero means
94  * that it is not implemented.)  This value is used here by pathconf()
95  * and in kern_descrip.c by fpathconf().
96  */
97 int async_io_version;
98 
99 #ifdef DEBUG
100 static int syncprt = 0;
101 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
102 #endif
103 
104 /*
105  * Sync each mounted filesystem.
106  */
107 #ifndef _SYS_SYSPROTO_H_
108 struct sync_args {
109 	int     dummy;
110 };
111 #endif
112 /* ARGSUSED */
113 int
114 sync(td, uap)
115 	struct thread *td;
116 	struct sync_args *uap;
117 {
118 	struct mount *mp, *nmp;
119 	int vfslocked;
120 
121 	mtx_lock(&mountlist_mtx);
122 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124 			nmp = TAILQ_NEXT(mp, mnt_list);
125 			continue;
126 		}
127 		vfslocked = VFS_LOCK_GIANT(mp);
128 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130 			MNT_ILOCK(mp);
131 			mp->mnt_noasync++;
132 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133 			MNT_IUNLOCK(mp);
134 			vfs_msync(mp, MNT_NOWAIT);
135 			VFS_SYNC(mp, MNT_NOWAIT, td);
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync--;
138 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139 			    mp->mnt_noasync == 0)
140 				mp->mnt_kern_flag |= MNTK_ASYNC;
141 			MNT_IUNLOCK(mp);
142 			vn_finished_write(mp);
143 		}
144 		VFS_UNLOCK_GIANT(vfslocked);
145 		mtx_lock(&mountlist_mtx);
146 		nmp = TAILQ_NEXT(mp, mnt_list);
147 		vfs_unbusy(mp, td);
148 	}
149 	mtx_unlock(&mountlist_mtx);
150 	return (0);
151 }
152 
153 /* XXX PRISON: could be per prison flag */
154 static int prison_quotas;
155 #if 0
156 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157 #endif
158 
159 /*
160  * Change filesystem quotas.
161  */
162 #ifndef _SYS_SYSPROTO_H_
163 struct quotactl_args {
164 	char *path;
165 	int cmd;
166 	int uid;
167 	caddr_t arg;
168 };
169 #endif
170 int
171 quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		char *path;
175 		int cmd;
176 		int uid;
177 		caddr_t arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int vfslocked;
182 	int error;
183 	struct nameidata nd;
184 
185 	AUDIT_ARG(cmd, uap->cmd);
186 	AUDIT_ARG(uid, uap->uid);
187 	if (jailed(td->td_ucred) && !prison_quotas)
188 		return (EPERM);
189 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
190 	   UIO_USERSPACE, uap->path, td);
191 	if ((error = namei(&nd)) != 0)
192 		return (error);
193 	vfslocked = NDHASGIANT(&nd);
194 	NDFREE(&nd, NDF_ONLY_PNBUF);
195 	mp = nd.ni_vp->v_mount;
196 	if ((error = vfs_busy(mp, 0, NULL, td))) {
197 		vrele(nd.ni_vp);
198 		VFS_UNLOCK_GIANT(vfslocked);
199 		return (error);
200 	}
201 	vrele(nd.ni_vp);
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vfs_unbusy(mp, td);
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error)
257 		goto out;
258 #endif
259 	/*
260 	 * Set these in case the underlying filesystem fails to do so.
261 	 */
262 	sp = &mp->mnt_stat;
263 	sp->f_version = STATFS_VERSION;
264 	sp->f_namemax = NAME_MAX;
265 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266 	error = VFS_STATFS(mp, sp, td);
267 	if (error)
268 		goto out;
269 	if (priv_check(td, PRIV_VFS_GENERATION)) {
270 		bcopy(sp, &sb, sizeof(sb));
271 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
272 		prison_enforce_statfs(td->td_ucred, mp, &sb);
273 		sp = &sb;
274 	}
275 	*buf = *sp;
276 out:
277 	vfs_rel(mp);
278 	VFS_UNLOCK_GIANT(vfslocked);
279 	if (mtx_owned(&Giant))
280 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281 	return (error);
282 }
283 
284 /*
285  * Get filesystem statistics.
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct fstatfs_args {
289 	int fd;
290 	struct statfs *buf;
291 };
292 #endif
293 int
294 fstatfs(td, uap)
295 	struct thread *td;
296 	register struct fstatfs_args /* {
297 		int fd;
298 		struct statfs *buf;
299 	} */ *uap;
300 {
301 	struct statfs sf;
302 	int error;
303 
304 	error = kern_fstatfs(td, uap->fd, &sf);
305 	if (error == 0)
306 		error = copyout(&sf, uap->buf, sizeof(sf));
307 	return (error);
308 }
309 
310 int
311 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312 {
313 	struct file *fp;
314 	struct mount *mp;
315 	struct statfs *sp, sb;
316 	int vfslocked;
317 	struct vnode *vp;
318 	int error;
319 
320 	AUDIT_ARG(fd, fd);
321 	error = getvnode(td->td_proc->p_fd, fd, &fp);
322 	if (error)
323 		return (error);
324 	vp = fp->f_vnode;
325 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327 #ifdef AUDIT
328 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329 #endif
330 	mp = vp->v_mount;
331 	if (mp)
332 		vfs_ref(mp);
333 	VOP_UNLOCK(vp, 0, td);
334 	fdrop(fp, td);
335 	if (vp->v_iflag & VI_DOOMED) {
336 		error = EBADF;
337 		goto out;
338 	}
339 #ifdef MAC
340 	error = mac_check_mount_stat(td->td_ucred, mp);
341 	if (error)
342 		goto out;
343 #endif
344 	/*
345 	 * Set these in case the underlying filesystem fails to do so.
346 	 */
347 	sp = &mp->mnt_stat;
348 	sp->f_version = STATFS_VERSION;
349 	sp->f_namemax = NAME_MAX;
350 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351 	error = VFS_STATFS(mp, sp, td);
352 	if (error)
353 		goto out;
354 	if (priv_check(td, PRIV_VFS_GENERATION)) {
355 		bcopy(sp, &sb, sizeof(sb));
356 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
357 		prison_enforce_statfs(td->td_ucred, mp, &sb);
358 		sp = &sb;
359 	}
360 	*buf = *sp;
361 out:
362 	if (mp)
363 		vfs_rel(mp);
364 	VFS_UNLOCK_GIANT(vfslocked);
365 	return (error);
366 }
367 
368 /*
369  * Get statistics on all filesystems.
370  */
371 #ifndef _SYS_SYSPROTO_H_
372 struct getfsstat_args {
373 	struct statfs *buf;
374 	long bufsize;
375 	int flags;
376 };
377 #endif
378 int
379 getfsstat(td, uap)
380 	struct thread *td;
381 	register struct getfsstat_args /* {
382 		struct statfs *buf;
383 		long bufsize;
384 		int flags;
385 	} */ *uap;
386 {
387 
388 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
389 	    uap->flags));
390 }
391 
392 /*
393  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
394  * 	The caller is responsible for freeing memory which will be allocated
395  *	in '*buf'.
396  */
397 int
398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
399     enum uio_seg bufseg, int flags)
400 {
401 	struct mount *mp, *nmp;
402 	struct statfs *sfsp, *sp, sb;
403 	size_t count, maxcount;
404 	int vfslocked;
405 	int error;
406 
407 	maxcount = bufsize / sizeof(struct statfs);
408 	if (bufsize == 0)
409 		sfsp = NULL;
410 	else if (bufseg == UIO_USERSPACE)
411 		sfsp = *buf;
412 	else /* if (bufseg == UIO_SYSSPACE) */ {
413 		count = 0;
414 		mtx_lock(&mountlist_mtx);
415 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
416 			count++;
417 		}
418 		mtx_unlock(&mountlist_mtx);
419 		if (maxcount > count)
420 			maxcount = count;
421 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
422 		    M_WAITOK);
423 	}
424 	count = 0;
425 	mtx_lock(&mountlist_mtx);
426 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
427 		if (prison_canseemount(td->td_ucred, mp) != 0) {
428 			nmp = TAILQ_NEXT(mp, mnt_list);
429 			continue;
430 		}
431 #ifdef MAC
432 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #endif
437 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 		vfslocked = VFS_LOCK_GIANT(mp);
442 		if (sfsp && count < maxcount) {
443 			sp = &mp->mnt_stat;
444 			/*
445 			 * Set these in case the underlying filesystem
446 			 * fails to do so.
447 			 */
448 			sp->f_version = STATFS_VERSION;
449 			sp->f_namemax = NAME_MAX;
450 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
451 			/*
452 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
453 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
454 			 * overrides MNT_WAIT.
455 			 */
456 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
457 			    (flags & MNT_WAIT)) &&
458 			    (error = VFS_STATFS(mp, sp, td))) {
459 				VFS_UNLOCK_GIANT(vfslocked);
460 				mtx_lock(&mountlist_mtx);
461 				nmp = TAILQ_NEXT(mp, mnt_list);
462 				vfs_unbusy(mp, td);
463 				continue;
464 			}
465 			if (priv_check(td, PRIV_VFS_GENERATION)) {
466 				bcopy(sp, &sb, sizeof(sb));
467 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
468 				prison_enforce_statfs(td->td_ucred, mp, &sb);
469 				sp = &sb;
470 			}
471 			if (bufseg == UIO_SYSSPACE)
472 				bcopy(sp, sfsp, sizeof(*sp));
473 			else /* if (bufseg == UIO_USERSPACE) */ {
474 				error = copyout(sp, sfsp, sizeof(*sp));
475 				if (error) {
476 					vfs_unbusy(mp, td);
477 					VFS_UNLOCK_GIANT(vfslocked);
478 					return (error);
479 				}
480 			}
481 			sfsp++;
482 		}
483 		VFS_UNLOCK_GIANT(vfslocked);
484 		count++;
485 		mtx_lock(&mountlist_mtx);
486 		nmp = TAILQ_NEXT(mp, mnt_list);
487 		vfs_unbusy(mp, td);
488 	}
489 	mtx_unlock(&mountlist_mtx);
490 	if (sfsp && count > maxcount)
491 		td->td_retval[0] = maxcount;
492 	else
493 		td->td_retval[0] = count;
494 	return (0);
495 }
496 
497 #ifdef COMPAT_FREEBSD4
498 /*
499  * Get old format filesystem statistics.
500  */
501 static void cvtstatfs(struct statfs *, struct ostatfs *);
502 
503 #ifndef _SYS_SYSPROTO_H_
504 struct freebsd4_statfs_args {
505 	char *path;
506 	struct ostatfs *buf;
507 };
508 #endif
509 int
510 freebsd4_statfs(td, uap)
511 	struct thread *td;
512 	struct freebsd4_statfs_args /* {
513 		char *path;
514 		struct ostatfs *buf;
515 	} */ *uap;
516 {
517 	struct ostatfs osb;
518 	struct statfs sf;
519 	int error;
520 
521 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
522 	if (error)
523 		return (error);
524 	cvtstatfs(&sf, &osb);
525 	return (copyout(&osb, uap->buf, sizeof(osb)));
526 }
527 
528 /*
529  * Get filesystem statistics.
530  */
531 #ifndef _SYS_SYSPROTO_H_
532 struct freebsd4_fstatfs_args {
533 	int fd;
534 	struct ostatfs *buf;
535 };
536 #endif
537 int
538 freebsd4_fstatfs(td, uap)
539 	struct thread *td;
540 	struct freebsd4_fstatfs_args /* {
541 		int fd;
542 		struct ostatfs *buf;
543 	} */ *uap;
544 {
545 	struct ostatfs osb;
546 	struct statfs sf;
547 	int error;
548 
549 	error = kern_fstatfs(td, uap->fd, &sf);
550 	if (error)
551 		return (error);
552 	cvtstatfs(&sf, &osb);
553 	return (copyout(&osb, uap->buf, sizeof(osb)));
554 }
555 
556 /*
557  * Get statistics on all filesystems.
558  */
559 #ifndef _SYS_SYSPROTO_H_
560 struct freebsd4_getfsstat_args {
561 	struct ostatfs *buf;
562 	long bufsize;
563 	int flags;
564 };
565 #endif
566 int
567 freebsd4_getfsstat(td, uap)
568 	struct thread *td;
569 	register struct freebsd4_getfsstat_args /* {
570 		struct ostatfs *buf;
571 		long bufsize;
572 		int flags;
573 	} */ *uap;
574 {
575 	struct statfs *buf, *sp;
576 	struct ostatfs osb;
577 	size_t count, size;
578 	int error;
579 
580 	count = uap->bufsize / sizeof(struct ostatfs);
581 	size = count * sizeof(struct statfs);
582 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
583 	if (size > 0) {
584 		count = td->td_retval[0];
585 		sp = buf;
586 		while (count > 0 && error == 0) {
587 			cvtstatfs(sp, &osb);
588 			error = copyout(&osb, uap->buf, sizeof(osb));
589 			sp++;
590 			uap->buf++;
591 			count--;
592 		}
593 		free(buf, M_TEMP);
594 	}
595 	return (error);
596 }
597 
598 /*
599  * Implement fstatfs() for (NFS) file handles.
600  */
601 #ifndef _SYS_SYSPROTO_H_
602 struct freebsd4_fhstatfs_args {
603 	struct fhandle *u_fhp;
604 	struct ostatfs *buf;
605 };
606 #endif
607 int
608 freebsd4_fhstatfs(td, uap)
609 	struct thread *td;
610 	struct freebsd4_fhstatfs_args /* {
611 		struct fhandle *u_fhp;
612 		struct ostatfs *buf;
613 	} */ *uap;
614 {
615 	struct ostatfs osb;
616 	struct statfs sf;
617 	fhandle_t fh;
618 	int error;
619 
620 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621 	if (error)
622 		return (error);
623 	error = kern_fhstatfs(td, fh, &sf);
624 	if (error)
625 		return (error);
626 	cvtstatfs(&sf, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(nsp, osp)
635 	struct statfs *nsp;
636 	struct ostatfs *osp;
637 {
638 
639 	bzero(osp, sizeof(*osp));
640 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647 	osp->f_owner = nsp->f_owner;
648 	osp->f_type = nsp->f_type;
649 	osp->f_flags = nsp->f_flags;
650 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
655 	    MIN(MFSNAMELEN, OMFSNAMELEN));
656 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
657 	    MIN(MNAMELEN, OMNAMELEN));
658 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
659 	    MIN(MNAMELEN, OMNAMELEN));
660 	osp->f_fsid = nsp->f_fsid;
661 }
662 #endif /* COMPAT_FREEBSD4 */
663 
664 /*
665  * Change current working directory to a given file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct fchdir_args {
669 	int	fd;
670 };
671 #endif
672 int
673 fchdir(td, uap)
674 	struct thread *td;
675 	struct fchdir_args /* {
676 		int fd;
677 	} */ *uap;
678 {
679 	register struct filedesc *fdp = td->td_proc->p_fd;
680 	struct vnode *vp, *tdp, *vpold;
681 	struct mount *mp;
682 	struct file *fp;
683 	int vfslocked;
684 	int error;
685 
686 	AUDIT_ARG(fd, uap->fd);
687 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
688 		return (error);
689 	vp = fp->f_vnode;
690 	VREF(vp);
691 	fdrop(fp, td);
692 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
693 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
694 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
695 	error = change_dir(vp, td);
696 	while (!error && (mp = vp->v_mountedhere) != NULL) {
697 		int tvfslocked;
698 		if (vfs_busy(mp, 0, 0, td))
699 			continue;
700 		tvfslocked = VFS_LOCK_GIANT(mp);
701 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
702 		vfs_unbusy(mp, td);
703 		if (error) {
704 			VFS_UNLOCK_GIANT(tvfslocked);
705 			break;
706 		}
707 		vput(vp);
708 		VFS_UNLOCK_GIANT(vfslocked);
709 		vp = tdp;
710 		vfslocked = tvfslocked;
711 	}
712 	if (error) {
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		return (error);
716 	}
717 	VOP_UNLOCK(vp, 0, td);
718 	VFS_UNLOCK_GIANT(vfslocked);
719 	FILEDESC_XLOCK(fdp);
720 	vpold = fdp->fd_cdir;
721 	fdp->fd_cdir = vp;
722 	FILEDESC_XUNLOCK(fdp);
723 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
724 	vrele(vpold);
725 	VFS_UNLOCK_GIANT(vfslocked);
726 	return (0);
727 }
728 
729 /*
730  * Change current working directory (``.'').
731  */
732 #ifndef _SYS_SYSPROTO_H_
733 struct chdir_args {
734 	char	*path;
735 };
736 #endif
737 int
738 chdir(td, uap)
739 	struct thread *td;
740 	struct chdir_args /* {
741 		char *path;
742 	} */ *uap;
743 {
744 
745 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746 }
747 
748 int
749 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750 {
751 	register struct filedesc *fdp = td->td_proc->p_fd;
752 	int error;
753 	struct nameidata nd;
754 	struct vnode *vp;
755 	int vfslocked;
756 
757 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
758 	    pathseg, path, td);
759 	if ((error = namei(&nd)) != 0)
760 		return (error);
761 	vfslocked = NDHASGIANT(&nd);
762 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
763 		vput(nd.ni_vp);
764 		VFS_UNLOCK_GIANT(vfslocked);
765 		NDFREE(&nd, NDF_ONLY_PNBUF);
766 		return (error);
767 	}
768 	VOP_UNLOCK(nd.ni_vp, 0, td);
769 	VFS_UNLOCK_GIANT(vfslocked);
770 	NDFREE(&nd, NDF_ONLY_PNBUF);
771 	FILEDESC_XLOCK(fdp);
772 	vp = fdp->fd_cdir;
773 	fdp->fd_cdir = nd.ni_vp;
774 	FILEDESC_XUNLOCK(fdp);
775 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
776 	vrele(vp);
777 	VFS_UNLOCK_GIANT(vfslocked);
778 	return (0);
779 }
780 
781 /*
782  * Helper function for raised chroot(2) security function:  Refuse if
783  * any filedescriptors are open directories.
784  */
785 static int
786 chroot_refuse_vdir_fds(fdp)
787 	struct filedesc *fdp;
788 {
789 	struct vnode *vp;
790 	struct file *fp;
791 	int fd;
792 
793 	FILEDESC_LOCK_ASSERT(fdp);
794 
795 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796 		fp = fget_locked(fdp, fd);
797 		if (fp == NULL)
798 			continue;
799 		if (fp->f_type == DTYPE_VNODE) {
800 			vp = fp->f_vnode;
801 			if (vp->v_type == VDIR)
802 				return (EPERM);
803 		}
804 	}
805 	return (0);
806 }
807 
808 /*
809  * This sysctl determines if we will allow a process to chroot(2) if it
810  * has a directory open:
811  *	0: disallowed for all processes.
812  *	1: allowed for processes that were not already chroot(2)'ed.
813  *	2: allowed for all processes.
814  */
815 
816 static int chroot_allow_open_directories = 1;
817 
818 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819      &chroot_allow_open_directories, 0, "");
820 
821 /*
822  * Change notion of root (``/'') directory.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct chroot_args {
826 	char	*path;
827 };
828 #endif
829 int
830 chroot(td, uap)
831 	struct thread *td;
832 	struct chroot_args /* {
833 		char *path;
834 	} */ *uap;
835 {
836 	int error;
837 	struct nameidata nd;
838 	int vfslocked;
839 
840 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
841 	    SUSER_ALLOWJAIL);
842 	if (error)
843 		return (error);
844 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
845 	    UIO_USERSPACE, uap->path, td);
846 	error = namei(&nd);
847 	if (error)
848 		goto error;
849 	vfslocked = NDHASGIANT(&nd);
850 	if ((error = change_dir(nd.ni_vp, td)) != 0)
851 		goto e_vunlock;
852 #ifdef MAC
853 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
854 		goto e_vunlock;
855 #endif
856 	VOP_UNLOCK(nd.ni_vp, 0, td);
857 	error = change_root(nd.ni_vp, td);
858 	vrele(nd.ni_vp);
859 	VFS_UNLOCK_GIANT(vfslocked);
860 	NDFREE(&nd, NDF_ONLY_PNBUF);
861 	return (error);
862 e_vunlock:
863 	vput(nd.ni_vp);
864 	VFS_UNLOCK_GIANT(vfslocked);
865 error:
866 	NDFREE(&nd, NDF_ONLY_PNBUF);
867 	return (error);
868 }
869 
870 /*
871  * Common routine for chroot and chdir.  Callers must provide a locked vnode
872  * instance.
873  */
874 int
875 change_dir(vp, td)
876 	struct vnode *vp;
877 	struct thread *td;
878 {
879 	int error;
880 
881 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
882 	if (vp->v_type != VDIR)
883 		return (ENOTDIR);
884 #ifdef MAC
885 	error = mac_check_vnode_chdir(td->td_ucred, vp);
886 	if (error)
887 		return (error);
888 #endif
889 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
890 	return (error);
891 }
892 
893 /*
894  * Common routine for kern_chroot() and jail_attach().  The caller is
895  * responsible for invoking priv_check() and mac_check_chroot() to authorize
896  * this operation.
897  */
898 int
899 change_root(vp, td)
900 	struct vnode *vp;
901 	struct thread *td;
902 {
903 	struct filedesc *fdp;
904 	struct vnode *oldvp;
905 	int vfslocked;
906 	int error;
907 
908 	VFS_ASSERT_GIANT(vp->v_mount);
909 	fdp = td->td_proc->p_fd;
910 	FILEDESC_XLOCK(fdp);
911 	if (chroot_allow_open_directories == 0 ||
912 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
913 		error = chroot_refuse_vdir_fds(fdp);
914 		if (error) {
915 			FILEDESC_XUNLOCK(fdp);
916 			return (error);
917 		}
918 	}
919 	oldvp = fdp->fd_rdir;
920 	fdp->fd_rdir = vp;
921 	VREF(fdp->fd_rdir);
922 	if (!fdp->fd_jdir) {
923 		fdp->fd_jdir = vp;
924 		VREF(fdp->fd_jdir);
925 	}
926 	FILEDESC_XUNLOCK(fdp);
927 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
928 	vrele(oldvp);
929 	VFS_UNLOCK_GIANT(vfslocked);
930 	return (0);
931 }
932 
933 /*
934  * Check permissions, allocate an open file structure, and call the device
935  * open routine if any.
936  */
937 #ifndef _SYS_SYSPROTO_H_
938 struct open_args {
939 	char	*path;
940 	int	flags;
941 	int	mode;
942 };
943 #endif
944 int
945 open(td, uap)
946 	struct thread *td;
947 	register struct open_args /* {
948 		char *path;
949 		int flags;
950 		int mode;
951 	} */ *uap;
952 {
953 
954 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
955 }
956 
957 int
958 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
959     int mode)
960 {
961 	struct proc *p = td->td_proc;
962 	struct filedesc *fdp = p->p_fd;
963 	struct file *fp;
964 	struct vnode *vp;
965 	struct vattr vat;
966 	struct mount *mp;
967 	int cmode;
968 	struct file *nfp;
969 	int type, indx, error;
970 	struct flock lf;
971 	struct nameidata nd;
972 	int vfslocked;
973 
974 	AUDIT_ARG(fflags, flags);
975 	AUDIT_ARG(mode, mode);
976 	if ((flags & O_ACCMODE) == O_ACCMODE)
977 		return (EINVAL);
978 	flags = FFLAGS(flags);
979 	error = falloc(td, &nfp, &indx);
980 	if (error)
981 		return (error);
982 	/* An extra reference on `nfp' has been held for us by falloc(). */
983 	fp = nfp;
984 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
985 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
986 	td->td_dupfd = -1;		/* XXX check for fdopen */
987 	error = vn_open(&nd, &flags, cmode, indx);
988 	if (error) {
989 		/*
990 		 * If the vn_open replaced the method vector, something
991 		 * wonderous happened deep below and we just pass it up
992 		 * pretending we know what we do.
993 		 */
994 		if (error == ENXIO && fp->f_ops != &badfileops) {
995 			fdrop(fp, td);
996 			td->td_retval[0] = indx;
997 			return (0);
998 		}
999 
1000 		/*
1001 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1002 		 * responsible for dropping the old contents of ofiles[indx]
1003 		 * if it succeeds.
1004 		 */
1005 		if ((error == ENODEV || error == ENXIO) &&
1006 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1007 		    (error =
1008 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1009 			td->td_retval[0] = indx;
1010 			fdrop(fp, td);
1011 			return (0);
1012 		}
1013 		/*
1014 		 * Clean up the descriptor, but only if another thread hadn't
1015 		 * replaced or closed it.
1016 		 */
1017 		fdclose(fdp, fp, indx, td);
1018 		fdrop(fp, td);
1019 
1020 		if (error == ERESTART)
1021 			error = EINTR;
1022 		return (error);
1023 	}
1024 	td->td_dupfd = 0;
1025 	vfslocked = NDHASGIANT(&nd);
1026 	NDFREE(&nd, NDF_ONLY_PNBUF);
1027 	vp = nd.ni_vp;
1028 
1029 	FILE_LOCK(fp);
1030 	fp->f_vnode = vp;
1031 	if (fp->f_data == NULL)
1032 		fp->f_data = vp;
1033 	fp->f_flag = flags & FMASK;
1034 	fp->f_seqcount = 1;
1035 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1036 	if (fp->f_ops == &badfileops)
1037 		fp->f_ops = &vnops;
1038 	FILE_UNLOCK(fp);
1039 
1040 	VOP_UNLOCK(vp, 0, td);
1041 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1042 		lf.l_whence = SEEK_SET;
1043 		lf.l_start = 0;
1044 		lf.l_len = 0;
1045 		if (flags & O_EXLOCK)
1046 			lf.l_type = F_WRLCK;
1047 		else
1048 			lf.l_type = F_RDLCK;
1049 		type = F_FLOCK;
1050 		if ((flags & FNONBLOCK) == 0)
1051 			type |= F_WAIT;
1052 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1053 			    type)) != 0)
1054 			goto bad;
1055 		fp->f_flag |= FHASLOCK;
1056 	}
1057 	if (flags & O_TRUNC) {
1058 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1059 			goto bad;
1060 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1061 		VATTR_NULL(&vat);
1062 		vat.va_size = 0;
1063 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1064 #ifdef MAC
1065 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1066 		if (error == 0)
1067 #endif
1068 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1069 		VOP_UNLOCK(vp, 0, td);
1070 		vn_finished_write(mp);
1071 		if (error)
1072 			goto bad;
1073 	}
1074 	VFS_UNLOCK_GIANT(vfslocked);
1075 	/*
1076 	 * Release our private reference, leaving the one associated with
1077 	 * the descriptor table intact.
1078 	 */
1079 	fdrop(fp, td);
1080 	td->td_retval[0] = indx;
1081 	return (0);
1082 bad:
1083 	VFS_UNLOCK_GIANT(vfslocked);
1084 	fdclose(fdp, fp, indx, td);
1085 	fdrop(fp, td);
1086 	return (error);
1087 }
1088 
1089 #ifdef COMPAT_43
1090 /*
1091  * Create a file.
1092  */
1093 #ifndef _SYS_SYSPROTO_H_
1094 struct ocreat_args {
1095 	char	*path;
1096 	int	mode;
1097 };
1098 #endif
1099 int
1100 ocreat(td, uap)
1101 	struct thread *td;
1102 	register struct ocreat_args /* {
1103 		char *path;
1104 		int mode;
1105 	} */ *uap;
1106 {
1107 
1108 	return (kern_open(td, uap->path, UIO_USERSPACE,
1109 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1110 }
1111 #endif /* COMPAT_43 */
1112 
1113 /*
1114  * Create a special file.
1115  */
1116 #ifndef _SYS_SYSPROTO_H_
1117 struct mknod_args {
1118 	char	*path;
1119 	int	mode;
1120 	int	dev;
1121 };
1122 #endif
1123 int
1124 mknod(td, uap)
1125 	struct thread *td;
1126 	register struct mknod_args /* {
1127 		char *path;
1128 		int mode;
1129 		int dev;
1130 	} */ *uap;
1131 {
1132 
1133 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1134 }
1135 
1136 int
1137 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1138     int dev)
1139 {
1140 	struct vnode *vp;
1141 	struct mount *mp;
1142 	struct vattr vattr;
1143 	int error;
1144 	int whiteout = 0;
1145 	struct nameidata nd;
1146 	int vfslocked;
1147 
1148 	AUDIT_ARG(mode, mode);
1149 	AUDIT_ARG(dev, dev);
1150 	switch (mode & S_IFMT) {
1151 	case S_IFCHR:
1152 	case S_IFBLK:
1153 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1154 		break;
1155 	case S_IFMT:
1156 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1157 		break;
1158 	case S_IFWHT:
1159 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1160 		break;
1161 	default:
1162 		error = EINVAL;
1163 		break;
1164 	}
1165 	if (error)
1166 		return (error);
1167 restart:
1168 	bwillwrite();
1169 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1170 	    pathseg, path, td);
1171 	if ((error = namei(&nd)) != 0)
1172 		return (error);
1173 	vfslocked = NDHASGIANT(&nd);
1174 	vp = nd.ni_vp;
1175 	if (vp != NULL) {
1176 		NDFREE(&nd, NDF_ONLY_PNBUF);
1177 		if (vp == nd.ni_dvp)
1178 			vrele(nd.ni_dvp);
1179 		else
1180 			vput(nd.ni_dvp);
1181 		vrele(vp);
1182 		VFS_UNLOCK_GIANT(vfslocked);
1183 		return (EEXIST);
1184 	} else {
1185 		VATTR_NULL(&vattr);
1186 		FILEDESC_SLOCK(td->td_proc->p_fd);
1187 		vattr.va_mode = (mode & ALLPERMS) &
1188 		    ~td->td_proc->p_fd->fd_cmask;
1189 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1190 		vattr.va_rdev = dev;
1191 		whiteout = 0;
1192 
1193 		switch (mode & S_IFMT) {
1194 		case S_IFMT:	/* used by badsect to flag bad sectors */
1195 			vattr.va_type = VBAD;
1196 			break;
1197 		case S_IFCHR:
1198 			vattr.va_type = VCHR;
1199 			break;
1200 		case S_IFBLK:
1201 			vattr.va_type = VBLK;
1202 			break;
1203 		case S_IFWHT:
1204 			whiteout = 1;
1205 			break;
1206 		default:
1207 			panic("kern_mknod: invalid mode");
1208 		}
1209 	}
1210 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1211 		NDFREE(&nd, NDF_ONLY_PNBUF);
1212 		vput(nd.ni_dvp);
1213 		VFS_UNLOCK_GIANT(vfslocked);
1214 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1215 			return (error);
1216 		goto restart;
1217 	}
1218 #ifdef MAC
1219 	if (error == 0 && !whiteout)
1220 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1221 		    &nd.ni_cnd, &vattr);
1222 #endif
1223 	if (!error) {
1224 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1225 		if (whiteout)
1226 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1227 		else {
1228 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1229 						&nd.ni_cnd, &vattr);
1230 			if (error == 0)
1231 				vput(nd.ni_vp);
1232 		}
1233 	}
1234 	NDFREE(&nd, NDF_ONLY_PNBUF);
1235 	vput(nd.ni_dvp);
1236 	vn_finished_write(mp);
1237 	VFS_UNLOCK_GIANT(vfslocked);
1238 	return (error);
1239 }
1240 
1241 /*
1242  * Create a named pipe.
1243  */
1244 #ifndef _SYS_SYSPROTO_H_
1245 struct mkfifo_args {
1246 	char	*path;
1247 	int	mode;
1248 };
1249 #endif
1250 int
1251 mkfifo(td, uap)
1252 	struct thread *td;
1253 	register struct mkfifo_args /* {
1254 		char *path;
1255 		int mode;
1256 	} */ *uap;
1257 {
1258 
1259 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1260 }
1261 
1262 int
1263 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1264 {
1265 	struct mount *mp;
1266 	struct vattr vattr;
1267 	int error;
1268 	struct nameidata nd;
1269 	int vfslocked;
1270 
1271 	AUDIT_ARG(mode, mode);
1272 restart:
1273 	bwillwrite();
1274 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1275 	    pathseg, path, td);
1276 	if ((error = namei(&nd)) != 0)
1277 		return (error);
1278 	vfslocked = NDHASGIANT(&nd);
1279 	if (nd.ni_vp != NULL) {
1280 		NDFREE(&nd, NDF_ONLY_PNBUF);
1281 		if (nd.ni_vp == nd.ni_dvp)
1282 			vrele(nd.ni_dvp);
1283 		else
1284 			vput(nd.ni_dvp);
1285 		vrele(nd.ni_vp);
1286 		VFS_UNLOCK_GIANT(vfslocked);
1287 		return (EEXIST);
1288 	}
1289 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1290 		NDFREE(&nd, NDF_ONLY_PNBUF);
1291 		vput(nd.ni_dvp);
1292 		VFS_UNLOCK_GIANT(vfslocked);
1293 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1294 			return (error);
1295 		goto restart;
1296 	}
1297 	VATTR_NULL(&vattr);
1298 	vattr.va_type = VFIFO;
1299 	FILEDESC_SLOCK(td->td_proc->p_fd);
1300 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1301 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1302 #ifdef MAC
1303 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1304 	    &vattr);
1305 	if (error)
1306 		goto out;
1307 #endif
1308 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1309 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1310 	if (error == 0)
1311 		vput(nd.ni_vp);
1312 #ifdef MAC
1313 out:
1314 #endif
1315 	vput(nd.ni_dvp);
1316 	vn_finished_write(mp);
1317 	VFS_UNLOCK_GIANT(vfslocked);
1318 	NDFREE(&nd, NDF_ONLY_PNBUF);
1319 	return (error);
1320 }
1321 
1322 /*
1323  * Make a hard file link.
1324  */
1325 #ifndef _SYS_SYSPROTO_H_
1326 struct link_args {
1327 	char	*path;
1328 	char	*link;
1329 };
1330 #endif
1331 int
1332 link(td, uap)
1333 	struct thread *td;
1334 	register struct link_args /* {
1335 		char *path;
1336 		char *link;
1337 	} */ *uap;
1338 {
1339 	int error;
1340 
1341 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1342 	return (error);
1343 }
1344 
1345 static int hardlink_check_uid = 0;
1346 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1347     &hardlink_check_uid, 0,
1348     "Unprivileged processes cannot create hard links to files owned by other "
1349     "users");
1350 static int hardlink_check_gid = 0;
1351 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1352     &hardlink_check_gid, 0,
1353     "Unprivileged processes cannot create hard links to files owned by other "
1354     "groups");
1355 
1356 static int
1357 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1358 {
1359 	struct vattr va;
1360 	int error;
1361 
1362 	if (!hardlink_check_uid && !hardlink_check_gid)
1363 		return (0);
1364 
1365 	error = VOP_GETATTR(vp, &va, cred, td);
1366 	if (error != 0)
1367 		return (error);
1368 
1369 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1370 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1371 		    SUSER_ALLOWJAIL);
1372 		if (error)
1373 			return (error);
1374 	}
1375 
1376 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1377 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1378 		    SUSER_ALLOWJAIL);
1379 		if (error)
1380 			return (error);
1381 	}
1382 
1383 	return (0);
1384 }
1385 
1386 int
1387 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1388 {
1389 	struct vnode *vp;
1390 	struct mount *mp;
1391 	struct nameidata nd;
1392 	int vfslocked;
1393 	int lvfslocked;
1394 	int error;
1395 
1396 	bwillwrite();
1397 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1398 	if ((error = namei(&nd)) != 0)
1399 		return (error);
1400 	vfslocked = NDHASGIANT(&nd);
1401 	NDFREE(&nd, NDF_ONLY_PNBUF);
1402 	vp = nd.ni_vp;
1403 	if (vp->v_type == VDIR) {
1404 		vrele(vp);
1405 		VFS_UNLOCK_GIANT(vfslocked);
1406 		return (EPERM);		/* POSIX */
1407 	}
1408 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1409 		vrele(vp);
1410 		VFS_UNLOCK_GIANT(vfslocked);
1411 		return (error);
1412 	}
1413 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1414 	    segflg, link, td);
1415 	if ((error = namei(&nd)) == 0) {
1416 		lvfslocked = NDHASGIANT(&nd);
1417 		if (nd.ni_vp != NULL) {
1418 			if (nd.ni_dvp == nd.ni_vp)
1419 				vrele(nd.ni_dvp);
1420 			else
1421 				vput(nd.ni_dvp);
1422 			vrele(nd.ni_vp);
1423 			error = EEXIST;
1424 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1425 		    == 0) {
1426 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1427 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1428 			error = can_hardlink(vp, td, td->td_ucred);
1429 			if (error == 0)
1430 #ifdef MAC
1431 				error = mac_check_vnode_link(td->td_ucred,
1432 				    nd.ni_dvp, vp, &nd.ni_cnd);
1433 			if (error == 0)
1434 #endif
1435 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1436 			VOP_UNLOCK(vp, 0, td);
1437 			vput(nd.ni_dvp);
1438 		}
1439 		NDFREE(&nd, NDF_ONLY_PNBUF);
1440 		VFS_UNLOCK_GIANT(lvfslocked);
1441 	}
1442 	vrele(vp);
1443 	vn_finished_write(mp);
1444 	VFS_UNLOCK_GIANT(vfslocked);
1445 	return (error);
1446 }
1447 
1448 /*
1449  * Make a symbolic link.
1450  */
1451 #ifndef _SYS_SYSPROTO_H_
1452 struct symlink_args {
1453 	char	*path;
1454 	char	*link;
1455 };
1456 #endif
1457 int
1458 symlink(td, uap)
1459 	struct thread *td;
1460 	register struct symlink_args /* {
1461 		char *path;
1462 		char *link;
1463 	} */ *uap;
1464 {
1465 
1466 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1467 }
1468 
1469 int
1470 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1471 {
1472 	struct mount *mp;
1473 	struct vattr vattr;
1474 	char *syspath;
1475 	int error;
1476 	struct nameidata nd;
1477 	int vfslocked;
1478 
1479 	if (segflg == UIO_SYSSPACE) {
1480 		syspath = path;
1481 	} else {
1482 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1483 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1484 			goto out;
1485 	}
1486 	AUDIT_ARG(text, syspath);
1487 restart:
1488 	bwillwrite();
1489 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1490 	    segflg, link, td);
1491 	if ((error = namei(&nd)) != 0)
1492 		goto out;
1493 	vfslocked = NDHASGIANT(&nd);
1494 	if (nd.ni_vp) {
1495 		NDFREE(&nd, NDF_ONLY_PNBUF);
1496 		if (nd.ni_vp == nd.ni_dvp)
1497 			vrele(nd.ni_dvp);
1498 		else
1499 			vput(nd.ni_dvp);
1500 		vrele(nd.ni_vp);
1501 		VFS_UNLOCK_GIANT(vfslocked);
1502 		error = EEXIST;
1503 		goto out;
1504 	}
1505 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1506 		NDFREE(&nd, NDF_ONLY_PNBUF);
1507 		vput(nd.ni_dvp);
1508 		VFS_UNLOCK_GIANT(vfslocked);
1509 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1510 			goto out;
1511 		goto restart;
1512 	}
1513 	VATTR_NULL(&vattr);
1514 	FILEDESC_SLOCK(td->td_proc->p_fd);
1515 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1516 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1517 #ifdef MAC
1518 	vattr.va_type = VLNK;
1519 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1520 	    &vattr);
1521 	if (error)
1522 		goto out2;
1523 #endif
1524 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1525 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1526 	if (error == 0)
1527 		vput(nd.ni_vp);
1528 #ifdef MAC
1529 out2:
1530 #endif
1531 	NDFREE(&nd, NDF_ONLY_PNBUF);
1532 	vput(nd.ni_dvp);
1533 	vn_finished_write(mp);
1534 	VFS_UNLOCK_GIANT(vfslocked);
1535 out:
1536 	if (segflg != UIO_SYSSPACE)
1537 		uma_zfree(namei_zone, syspath);
1538 	return (error);
1539 }
1540 
1541 /*
1542  * Delete a whiteout from the filesystem.
1543  */
1544 int
1545 undelete(td, uap)
1546 	struct thread *td;
1547 	register struct undelete_args /* {
1548 		char *path;
1549 	} */ *uap;
1550 {
1551 	int error;
1552 	struct mount *mp;
1553 	struct nameidata nd;
1554 	int vfslocked;
1555 
1556 restart:
1557 	bwillwrite();
1558 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1559 	    UIO_USERSPACE, uap->path, td);
1560 	error = namei(&nd);
1561 	if (error)
1562 		return (error);
1563 	vfslocked = NDHASGIANT(&nd);
1564 
1565 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1566 		NDFREE(&nd, NDF_ONLY_PNBUF);
1567 		if (nd.ni_vp == nd.ni_dvp)
1568 			vrele(nd.ni_dvp);
1569 		else
1570 			vput(nd.ni_dvp);
1571 		if (nd.ni_vp)
1572 			vrele(nd.ni_vp);
1573 		VFS_UNLOCK_GIANT(vfslocked);
1574 		return (EEXIST);
1575 	}
1576 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1577 		NDFREE(&nd, NDF_ONLY_PNBUF);
1578 		vput(nd.ni_dvp);
1579 		VFS_UNLOCK_GIANT(vfslocked);
1580 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1581 			return (error);
1582 		goto restart;
1583 	}
1584 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1585 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1586 	NDFREE(&nd, NDF_ONLY_PNBUF);
1587 	vput(nd.ni_dvp);
1588 	vn_finished_write(mp);
1589 	VFS_UNLOCK_GIANT(vfslocked);
1590 	return (error);
1591 }
1592 
1593 /*
1594  * Delete a name from the filesystem.
1595  */
1596 #ifndef _SYS_SYSPROTO_H_
1597 struct unlink_args {
1598 	char	*path;
1599 };
1600 #endif
1601 int
1602 unlink(td, uap)
1603 	struct thread *td;
1604 	struct unlink_args /* {
1605 		char *path;
1606 	} */ *uap;
1607 {
1608 	int error;
1609 
1610 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1611 	return (error);
1612 }
1613 
1614 int
1615 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1616 {
1617 	struct mount *mp;
1618 	struct vnode *vp;
1619 	int error;
1620 	struct nameidata nd;
1621 	int vfslocked;
1622 
1623 restart:
1624 	bwillwrite();
1625 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1626 	    pathseg, path, td);
1627 	if ((error = namei(&nd)) != 0)
1628 		return (error == EINVAL ? EPERM : error);
1629 	vfslocked = NDHASGIANT(&nd);
1630 	vp = nd.ni_vp;
1631 	if (vp->v_type == VDIR)
1632 		error = EPERM;		/* POSIX */
1633 	else {
1634 		/*
1635 		 * The root of a mounted filesystem cannot be deleted.
1636 		 *
1637 		 * XXX: can this only be a VDIR case?
1638 		 */
1639 		if (vp->v_vflag & VV_ROOT)
1640 			error = EBUSY;
1641 	}
1642 	if (error == 0) {
1643 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1644 			NDFREE(&nd, NDF_ONLY_PNBUF);
1645 			vput(nd.ni_dvp);
1646 			if (vp == nd.ni_dvp)
1647 				vrele(vp);
1648 			else
1649 				vput(vp);
1650 			VFS_UNLOCK_GIANT(vfslocked);
1651 			if ((error = vn_start_write(NULL, &mp,
1652 			    V_XSLEEP | PCATCH)) != 0)
1653 				return (error);
1654 			goto restart;
1655 		}
1656 #ifdef MAC
1657 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1658 		    &nd.ni_cnd);
1659 		if (error)
1660 			goto out;
1661 #endif
1662 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1663 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1664 #ifdef MAC
1665 out:
1666 #endif
1667 		vn_finished_write(mp);
1668 	}
1669 	NDFREE(&nd, NDF_ONLY_PNBUF);
1670 	vput(nd.ni_dvp);
1671 	if (vp == nd.ni_dvp)
1672 		vrele(vp);
1673 	else
1674 		vput(vp);
1675 	VFS_UNLOCK_GIANT(vfslocked);
1676 	return (error);
1677 }
1678 
1679 /*
1680  * Reposition read/write file offset.
1681  */
1682 #ifndef _SYS_SYSPROTO_H_
1683 struct lseek_args {
1684 	int	fd;
1685 	int	pad;
1686 	off_t	offset;
1687 	int	whence;
1688 };
1689 #endif
1690 int
1691 lseek(td, uap)
1692 	struct thread *td;
1693 	register struct lseek_args /* {
1694 		int fd;
1695 		int pad;
1696 		off_t offset;
1697 		int whence;
1698 	} */ *uap;
1699 {
1700 	struct ucred *cred = td->td_ucred;
1701 	struct file *fp;
1702 	struct vnode *vp;
1703 	struct vattr vattr;
1704 	off_t offset;
1705 	int error, noneg;
1706 	int vfslocked;
1707 
1708 	if ((error = fget(td, uap->fd, &fp)) != 0)
1709 		return (error);
1710 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1711 		fdrop(fp, td);
1712 		return (ESPIPE);
1713 	}
1714 	vp = fp->f_vnode;
1715 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1716 	noneg = (vp->v_type != VCHR);
1717 	offset = uap->offset;
1718 	switch (uap->whence) {
1719 	case L_INCR:
1720 		if (noneg &&
1721 		    (fp->f_offset < 0 ||
1722 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1723 			error = EOVERFLOW;
1724 			break;
1725 		}
1726 		offset += fp->f_offset;
1727 		break;
1728 	case L_XTND:
1729 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1730 		error = VOP_GETATTR(vp, &vattr, cred, td);
1731 		VOP_UNLOCK(vp, 0, td);
1732 		if (error)
1733 			break;
1734 		if (noneg &&
1735 		    (vattr.va_size > OFF_MAX ||
1736 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1737 			error = EOVERFLOW;
1738 			break;
1739 		}
1740 		offset += vattr.va_size;
1741 		break;
1742 	case L_SET:
1743 		break;
1744 	case SEEK_DATA:
1745 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1746 		break;
1747 	case SEEK_HOLE:
1748 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1749 		break;
1750 	default:
1751 		error = EINVAL;
1752 	}
1753 	if (error == 0 && noneg && offset < 0)
1754 		error = EINVAL;
1755 	if (error != 0)
1756 		goto drop;
1757 	fp->f_offset = offset;
1758 	*(off_t *)(td->td_retval) = fp->f_offset;
1759 drop:
1760 	fdrop(fp, td);
1761 	VFS_UNLOCK_GIANT(vfslocked);
1762 	return (error);
1763 }
1764 
1765 #if defined(COMPAT_43)
1766 /*
1767  * Reposition read/write file offset.
1768  */
1769 #ifndef _SYS_SYSPROTO_H_
1770 struct olseek_args {
1771 	int	fd;
1772 	long	offset;
1773 	int	whence;
1774 };
1775 #endif
1776 int
1777 olseek(td, uap)
1778 	struct thread *td;
1779 	register struct olseek_args /* {
1780 		int fd;
1781 		long offset;
1782 		int whence;
1783 	} */ *uap;
1784 {
1785 	struct lseek_args /* {
1786 		int fd;
1787 		int pad;
1788 		off_t offset;
1789 		int whence;
1790 	} */ nuap;
1791 	int error;
1792 
1793 	nuap.fd = uap->fd;
1794 	nuap.offset = uap->offset;
1795 	nuap.whence = uap->whence;
1796 	error = lseek(td, &nuap);
1797 	return (error);
1798 }
1799 #endif /* COMPAT_43 */
1800 
1801 /*
1802  * Check access permissions using passed credentials.
1803  */
1804 static int
1805 vn_access(vp, user_flags, cred, td)
1806 	struct vnode	*vp;
1807 	int		user_flags;
1808 	struct ucred	*cred;
1809 	struct thread	*td;
1810 {
1811 	int error, flags;
1812 
1813 	/* Flags == 0 means only check for existence. */
1814 	error = 0;
1815 	if (user_flags) {
1816 		flags = 0;
1817 		if (user_flags & R_OK)
1818 			flags |= VREAD;
1819 		if (user_flags & W_OK)
1820 			flags |= VWRITE;
1821 		if (user_flags & X_OK)
1822 			flags |= VEXEC;
1823 #ifdef MAC
1824 		error = mac_check_vnode_access(cred, vp, flags);
1825 		if (error)
1826 			return (error);
1827 #endif
1828 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1829 			error = VOP_ACCESS(vp, flags, cred, td);
1830 	}
1831 	return (error);
1832 }
1833 
1834 /*
1835  * Check access permissions using "real" credentials.
1836  */
1837 #ifndef _SYS_SYSPROTO_H_
1838 struct access_args {
1839 	char	*path;
1840 	int	flags;
1841 };
1842 #endif
1843 int
1844 access(td, uap)
1845 	struct thread *td;
1846 	register struct access_args /* {
1847 		char *path;
1848 		int flags;
1849 	} */ *uap;
1850 {
1851 
1852 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1853 }
1854 
1855 int
1856 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1857 {
1858 	struct ucred *cred, *tmpcred;
1859 	register struct vnode *vp;
1860 	struct nameidata nd;
1861 	int vfslocked;
1862 	int error;
1863 
1864 	/*
1865 	 * Create and modify a temporary credential instead of one that
1866 	 * is potentially shared.  This could also mess up socket
1867 	 * buffer accounting which can run in an interrupt context.
1868 	 */
1869 	cred = td->td_ucred;
1870 	tmpcred = crdup(cred);
1871 	tmpcred->cr_uid = cred->cr_ruid;
1872 	tmpcred->cr_groups[0] = cred->cr_rgid;
1873 	td->td_ucred = tmpcred;
1874 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1875 	    pathseg, path, td);
1876 	if ((error = namei(&nd)) != 0)
1877 		goto out1;
1878 	vfslocked = NDHASGIANT(&nd);
1879 	vp = nd.ni_vp;
1880 
1881 	error = vn_access(vp, flags, tmpcred, td);
1882 	NDFREE(&nd, NDF_ONLY_PNBUF);
1883 	vput(vp);
1884 	VFS_UNLOCK_GIANT(vfslocked);
1885 out1:
1886 	td->td_ucred = cred;
1887 	crfree(tmpcred);
1888 	return (error);
1889 }
1890 
1891 /*
1892  * Check access permissions using "effective" credentials.
1893  */
1894 #ifndef _SYS_SYSPROTO_H_
1895 struct eaccess_args {
1896 	char	*path;
1897 	int	flags;
1898 };
1899 #endif
1900 int
1901 eaccess(td, uap)
1902 	struct thread *td;
1903 	register struct eaccess_args /* {
1904 		char *path;
1905 		int flags;
1906 	} */ *uap;
1907 {
1908 
1909 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1910 }
1911 
1912 int
1913 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1914 {
1915 	struct nameidata nd;
1916 	struct vnode *vp;
1917 	int vfslocked;
1918 	int error;
1919 
1920 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1921 	    pathseg, path, td);
1922 	if ((error = namei(&nd)) != 0)
1923 		return (error);
1924 	vp = nd.ni_vp;
1925 	vfslocked = NDHASGIANT(&nd);
1926 	error = vn_access(vp, flags, td->td_ucred, td);
1927 	NDFREE(&nd, NDF_ONLY_PNBUF);
1928 	vput(vp);
1929 	VFS_UNLOCK_GIANT(vfslocked);
1930 	return (error);
1931 }
1932 
1933 #if defined(COMPAT_43)
1934 /*
1935  * Get file status; this version follows links.
1936  */
1937 #ifndef _SYS_SYSPROTO_H_
1938 struct ostat_args {
1939 	char	*path;
1940 	struct ostat *ub;
1941 };
1942 #endif
1943 int
1944 ostat(td, uap)
1945 	struct thread *td;
1946 	register struct ostat_args /* {
1947 		char *path;
1948 		struct ostat *ub;
1949 	} */ *uap;
1950 {
1951 	struct stat sb;
1952 	struct ostat osb;
1953 	int error;
1954 
1955 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1956 	if (error)
1957 		return (error);
1958 	cvtstat(&sb, &osb);
1959 	error = copyout(&osb, uap->ub, sizeof (osb));
1960 	return (error);
1961 }
1962 
1963 /*
1964  * Get file status; this version does not follow links.
1965  */
1966 #ifndef _SYS_SYSPROTO_H_
1967 struct olstat_args {
1968 	char	*path;
1969 	struct ostat *ub;
1970 };
1971 #endif
1972 int
1973 olstat(td, uap)
1974 	struct thread *td;
1975 	register struct olstat_args /* {
1976 		char *path;
1977 		struct ostat *ub;
1978 	} */ *uap;
1979 {
1980 	struct stat sb;
1981 	struct ostat osb;
1982 	int error;
1983 
1984 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1985 	if (error)
1986 		return (error);
1987 	cvtstat(&sb, &osb);
1988 	error = copyout(&osb, uap->ub, sizeof (osb));
1989 	return (error);
1990 }
1991 
1992 /*
1993  * Convert from an old to a new stat structure.
1994  */
1995 void
1996 cvtstat(st, ost)
1997 	struct stat *st;
1998 	struct ostat *ost;
1999 {
2000 
2001 	ost->st_dev = st->st_dev;
2002 	ost->st_ino = st->st_ino;
2003 	ost->st_mode = st->st_mode;
2004 	ost->st_nlink = st->st_nlink;
2005 	ost->st_uid = st->st_uid;
2006 	ost->st_gid = st->st_gid;
2007 	ost->st_rdev = st->st_rdev;
2008 	if (st->st_size < (quad_t)1 << 32)
2009 		ost->st_size = st->st_size;
2010 	else
2011 		ost->st_size = -2;
2012 	ost->st_atime = st->st_atime;
2013 	ost->st_mtime = st->st_mtime;
2014 	ost->st_ctime = st->st_ctime;
2015 	ost->st_blksize = st->st_blksize;
2016 	ost->st_blocks = st->st_blocks;
2017 	ost->st_flags = st->st_flags;
2018 	ost->st_gen = st->st_gen;
2019 }
2020 #endif /* COMPAT_43 */
2021 
2022 /*
2023  * Get file status; this version follows links.
2024  */
2025 #ifndef _SYS_SYSPROTO_H_
2026 struct stat_args {
2027 	char	*path;
2028 	struct stat *ub;
2029 };
2030 #endif
2031 int
2032 stat(td, uap)
2033 	struct thread *td;
2034 	register struct stat_args /* {
2035 		char *path;
2036 		struct stat *ub;
2037 	} */ *uap;
2038 {
2039 	struct stat sb;
2040 	int error;
2041 
2042 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2043 	if (error == 0)
2044 		error = copyout(&sb, uap->ub, sizeof (sb));
2045 	return (error);
2046 }
2047 
2048 int
2049 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2050 {
2051 	struct nameidata nd;
2052 	struct stat sb;
2053 	int error, vfslocked;
2054 
2055 	NDINIT(&nd, LOOKUP,
2056 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2057 	    pathseg, path, td);
2058 	if ((error = namei(&nd)) != 0)
2059 		return (error);
2060 	vfslocked = NDHASGIANT(&nd);
2061 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2062 	NDFREE(&nd, NDF_ONLY_PNBUF);
2063 	vput(nd.ni_vp);
2064 	VFS_UNLOCK_GIANT(vfslocked);
2065 	if (mtx_owned(&Giant))
2066 		printf("stat(%d): %s\n", vfslocked, path);
2067 	if (error)
2068 		return (error);
2069 	*sbp = sb;
2070 	return (0);
2071 }
2072 
2073 /*
2074  * Get file status; this version does not follow links.
2075  */
2076 #ifndef _SYS_SYSPROTO_H_
2077 struct lstat_args {
2078 	char	*path;
2079 	struct stat *ub;
2080 };
2081 #endif
2082 int
2083 lstat(td, uap)
2084 	struct thread *td;
2085 	register struct lstat_args /* {
2086 		char *path;
2087 		struct stat *ub;
2088 	} */ *uap;
2089 {
2090 	struct stat sb;
2091 	int error;
2092 
2093 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2094 	if (error == 0)
2095 		error = copyout(&sb, uap->ub, sizeof (sb));
2096 	return (error);
2097 }
2098 
2099 int
2100 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2101 {
2102 	struct vnode *vp;
2103 	struct stat sb;
2104 	struct nameidata nd;
2105 	int error, vfslocked;
2106 
2107 	NDINIT(&nd, LOOKUP,
2108 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2109 	    pathseg, path, td);
2110 	if ((error = namei(&nd)) != 0)
2111 		return (error);
2112 	vfslocked = NDHASGIANT(&nd);
2113 	vp = nd.ni_vp;
2114 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2115 	NDFREE(&nd, NDF_ONLY_PNBUF);
2116 	vput(vp);
2117 	VFS_UNLOCK_GIANT(vfslocked);
2118 	if (error)
2119 		return (error);
2120 	*sbp = sb;
2121 	return (0);
2122 }
2123 
2124 /*
2125  * Implementation of the NetBSD [l]stat() functions.
2126  */
2127 void
2128 cvtnstat(sb, nsb)
2129 	struct stat *sb;
2130 	struct nstat *nsb;
2131 {
2132 	bzero(nsb, sizeof *nsb);
2133 	nsb->st_dev = sb->st_dev;
2134 	nsb->st_ino = sb->st_ino;
2135 	nsb->st_mode = sb->st_mode;
2136 	nsb->st_nlink = sb->st_nlink;
2137 	nsb->st_uid = sb->st_uid;
2138 	nsb->st_gid = sb->st_gid;
2139 	nsb->st_rdev = sb->st_rdev;
2140 	nsb->st_atimespec = sb->st_atimespec;
2141 	nsb->st_mtimespec = sb->st_mtimespec;
2142 	nsb->st_ctimespec = sb->st_ctimespec;
2143 	nsb->st_size = sb->st_size;
2144 	nsb->st_blocks = sb->st_blocks;
2145 	nsb->st_blksize = sb->st_blksize;
2146 	nsb->st_flags = sb->st_flags;
2147 	nsb->st_gen = sb->st_gen;
2148 	nsb->st_birthtimespec = sb->st_birthtimespec;
2149 }
2150 
2151 #ifndef _SYS_SYSPROTO_H_
2152 struct nstat_args {
2153 	char	*path;
2154 	struct nstat *ub;
2155 };
2156 #endif
2157 int
2158 nstat(td, uap)
2159 	struct thread *td;
2160 	register struct nstat_args /* {
2161 		char *path;
2162 		struct nstat *ub;
2163 	} */ *uap;
2164 {
2165 	struct stat sb;
2166 	struct nstat nsb;
2167 	int error;
2168 
2169 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2170 	if (error)
2171 		return (error);
2172 	cvtnstat(&sb, &nsb);
2173 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2174 	return (error);
2175 }
2176 
2177 /*
2178  * NetBSD lstat.  Get file status; this version does not follow links.
2179  */
2180 #ifndef _SYS_SYSPROTO_H_
2181 struct lstat_args {
2182 	char	*path;
2183 	struct stat *ub;
2184 };
2185 #endif
2186 int
2187 nlstat(td, uap)
2188 	struct thread *td;
2189 	register struct nlstat_args /* {
2190 		char *path;
2191 		struct nstat *ub;
2192 	} */ *uap;
2193 {
2194 	struct stat sb;
2195 	struct nstat nsb;
2196 	int error;
2197 
2198 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2199 	if (error)
2200 		return (error);
2201 	cvtnstat(&sb, &nsb);
2202 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2203 	return (error);
2204 }
2205 
2206 /*
2207  * Get configurable pathname variables.
2208  */
2209 #ifndef _SYS_SYSPROTO_H_
2210 struct pathconf_args {
2211 	char	*path;
2212 	int	name;
2213 };
2214 #endif
2215 int
2216 pathconf(td, uap)
2217 	struct thread *td;
2218 	register struct pathconf_args /* {
2219 		char *path;
2220 		int name;
2221 	} */ *uap;
2222 {
2223 
2224 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2225 }
2226 
2227 int
2228 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2229 {
2230 	struct nameidata nd;
2231 	int error, vfslocked;
2232 
2233 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2234 	    pathseg, path, td);
2235 	if ((error = namei(&nd)) != 0)
2236 		return (error);
2237 	vfslocked = NDHASGIANT(&nd);
2238 	NDFREE(&nd, NDF_ONLY_PNBUF);
2239 
2240 	/* If asynchronous I/O is available, it works for all files. */
2241 	if (name == _PC_ASYNC_IO)
2242 		td->td_retval[0] = async_io_version;
2243 	else
2244 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2245 	vput(nd.ni_vp);
2246 	VFS_UNLOCK_GIANT(vfslocked);
2247 	return (error);
2248 }
2249 
2250 /*
2251  * Return target name of a symbolic link.
2252  */
2253 #ifndef _SYS_SYSPROTO_H_
2254 struct readlink_args {
2255 	char	*path;
2256 	char	*buf;
2257 	int	count;
2258 };
2259 #endif
2260 int
2261 readlink(td, uap)
2262 	struct thread *td;
2263 	register struct readlink_args /* {
2264 		char *path;
2265 		char *buf;
2266 		int count;
2267 	} */ *uap;
2268 {
2269 
2270 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2271 	    UIO_USERSPACE, uap->count));
2272 }
2273 
2274 int
2275 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2276     enum uio_seg bufseg, int count)
2277 {
2278 	register struct vnode *vp;
2279 	struct iovec aiov;
2280 	struct uio auio;
2281 	int error;
2282 	struct nameidata nd;
2283 	int vfslocked;
2284 
2285 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2286 	    pathseg, path, td);
2287 	if ((error = namei(&nd)) != 0)
2288 		return (error);
2289 	NDFREE(&nd, NDF_ONLY_PNBUF);
2290 	vfslocked = NDHASGIANT(&nd);
2291 	vp = nd.ni_vp;
2292 #ifdef MAC
2293 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2294 	if (error) {
2295 		vput(vp);
2296 		VFS_UNLOCK_GIANT(vfslocked);
2297 		return (error);
2298 	}
2299 #endif
2300 	if (vp->v_type != VLNK)
2301 		error = EINVAL;
2302 	else {
2303 		aiov.iov_base = buf;
2304 		aiov.iov_len = count;
2305 		auio.uio_iov = &aiov;
2306 		auio.uio_iovcnt = 1;
2307 		auio.uio_offset = 0;
2308 		auio.uio_rw = UIO_READ;
2309 		auio.uio_segflg = bufseg;
2310 		auio.uio_td = td;
2311 		auio.uio_resid = count;
2312 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2313 	}
2314 	vput(vp);
2315 	VFS_UNLOCK_GIANT(vfslocked);
2316 	td->td_retval[0] = count - auio.uio_resid;
2317 	return (error);
2318 }
2319 
2320 /*
2321  * Common implementation code for chflags() and fchflags().
2322  */
2323 static int
2324 setfflags(td, vp, flags)
2325 	struct thread *td;
2326 	struct vnode *vp;
2327 	int flags;
2328 {
2329 	int error;
2330 	struct mount *mp;
2331 	struct vattr vattr;
2332 
2333 	/*
2334 	 * Prevent non-root users from setting flags on devices.  When
2335 	 * a device is reused, users can retain ownership of the device
2336 	 * if they are allowed to set flags and programs assume that
2337 	 * chown can't fail when done as root.
2338 	 */
2339 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2340 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2341 		    SUSER_ALLOWJAIL);
2342 		if (error)
2343 			return (error);
2344 	}
2345 
2346 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2347 		return (error);
2348 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2349 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2350 	VATTR_NULL(&vattr);
2351 	vattr.va_flags = flags;
2352 #ifdef MAC
2353 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2354 	if (error == 0)
2355 #endif
2356 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2357 	VOP_UNLOCK(vp, 0, td);
2358 	vn_finished_write(mp);
2359 	return (error);
2360 }
2361 
2362 /*
2363  * Change flags of a file given a path name.
2364  */
2365 #ifndef _SYS_SYSPROTO_H_
2366 struct chflags_args {
2367 	char	*path;
2368 	int	flags;
2369 };
2370 #endif
2371 int
2372 chflags(td, uap)
2373 	struct thread *td;
2374 	register struct chflags_args /* {
2375 		char *path;
2376 		int flags;
2377 	} */ *uap;
2378 {
2379 	int error;
2380 	struct nameidata nd;
2381 	int vfslocked;
2382 
2383 	AUDIT_ARG(fflags, uap->flags);
2384 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2385 	    uap->path, td);
2386 	if ((error = namei(&nd)) != 0)
2387 		return (error);
2388 	NDFREE(&nd, NDF_ONLY_PNBUF);
2389 	vfslocked = NDHASGIANT(&nd);
2390 	error = setfflags(td, nd.ni_vp, uap->flags);
2391 	vrele(nd.ni_vp);
2392 	VFS_UNLOCK_GIANT(vfslocked);
2393 	return (error);
2394 }
2395 
2396 /*
2397  * Same as chflags() but doesn't follow symlinks.
2398  */
2399 int
2400 lchflags(td, uap)
2401 	struct thread *td;
2402 	register struct lchflags_args /* {
2403 		char *path;
2404 		int flags;
2405 	} */ *uap;
2406 {
2407 	int error;
2408 	struct nameidata nd;
2409 	int vfslocked;
2410 
2411 	AUDIT_ARG(fflags, uap->flags);
2412 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2413 	    uap->path, td);
2414 	if ((error = namei(&nd)) != 0)
2415 		return (error);
2416 	vfslocked = NDHASGIANT(&nd);
2417 	NDFREE(&nd, NDF_ONLY_PNBUF);
2418 	error = setfflags(td, nd.ni_vp, uap->flags);
2419 	vrele(nd.ni_vp);
2420 	VFS_UNLOCK_GIANT(vfslocked);
2421 	return (error);
2422 }
2423 
2424 /*
2425  * Change flags of a file given a file descriptor.
2426  */
2427 #ifndef _SYS_SYSPROTO_H_
2428 struct fchflags_args {
2429 	int	fd;
2430 	int	flags;
2431 };
2432 #endif
2433 int
2434 fchflags(td, uap)
2435 	struct thread *td;
2436 	register struct fchflags_args /* {
2437 		int fd;
2438 		int flags;
2439 	} */ *uap;
2440 {
2441 	struct file *fp;
2442 	int vfslocked;
2443 	int error;
2444 
2445 	AUDIT_ARG(fd, uap->fd);
2446 	AUDIT_ARG(fflags, uap->flags);
2447 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2448 		return (error);
2449 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2450 #ifdef AUDIT
2451 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2452 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2453 	VOP_UNLOCK(fp->f_vnode, 0, td);
2454 #endif
2455 	error = setfflags(td, fp->f_vnode, uap->flags);
2456 	VFS_UNLOCK_GIANT(vfslocked);
2457 	fdrop(fp, td);
2458 	return (error);
2459 }
2460 
2461 /*
2462  * Common implementation code for chmod(), lchmod() and fchmod().
2463  */
2464 static int
2465 setfmode(td, vp, mode)
2466 	struct thread *td;
2467 	struct vnode *vp;
2468 	int mode;
2469 {
2470 	int error;
2471 	struct mount *mp;
2472 	struct vattr vattr;
2473 
2474 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2475 		return (error);
2476 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2477 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2478 	VATTR_NULL(&vattr);
2479 	vattr.va_mode = mode & ALLPERMS;
2480 #ifdef MAC
2481 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2482 	if (error == 0)
2483 #endif
2484 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2485 	VOP_UNLOCK(vp, 0, td);
2486 	vn_finished_write(mp);
2487 	return (error);
2488 }
2489 
2490 /*
2491  * Change mode of a file given path name.
2492  */
2493 #ifndef _SYS_SYSPROTO_H_
2494 struct chmod_args {
2495 	char	*path;
2496 	int	mode;
2497 };
2498 #endif
2499 int
2500 chmod(td, uap)
2501 	struct thread *td;
2502 	register struct chmod_args /* {
2503 		char *path;
2504 		int mode;
2505 	} */ *uap;
2506 {
2507 
2508 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2509 }
2510 
2511 int
2512 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2513 {
2514 	int error;
2515 	struct nameidata nd;
2516 	int vfslocked;
2517 
2518 	AUDIT_ARG(mode, mode);
2519 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2520 	if ((error = namei(&nd)) != 0)
2521 		return (error);
2522 	vfslocked = NDHASGIANT(&nd);
2523 	NDFREE(&nd, NDF_ONLY_PNBUF);
2524 	error = setfmode(td, nd.ni_vp, mode);
2525 	vrele(nd.ni_vp);
2526 	VFS_UNLOCK_GIANT(vfslocked);
2527 	return (error);
2528 }
2529 
2530 /*
2531  * Change mode of a file given path name (don't follow links.)
2532  */
2533 #ifndef _SYS_SYSPROTO_H_
2534 struct lchmod_args {
2535 	char	*path;
2536 	int	mode;
2537 };
2538 #endif
2539 int
2540 lchmod(td, uap)
2541 	struct thread *td;
2542 	register struct lchmod_args /* {
2543 		char *path;
2544 		int mode;
2545 	} */ *uap;
2546 {
2547 	int error;
2548 	struct nameidata nd;
2549 	int vfslocked;
2550 
2551 	AUDIT_ARG(mode, (mode_t)uap->mode);
2552 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2553 	    uap->path, td);
2554 	if ((error = namei(&nd)) != 0)
2555 		return (error);
2556 	vfslocked = NDHASGIANT(&nd);
2557 	NDFREE(&nd, NDF_ONLY_PNBUF);
2558 	error = setfmode(td, nd.ni_vp, uap->mode);
2559 	vrele(nd.ni_vp);
2560 	VFS_UNLOCK_GIANT(vfslocked);
2561 	return (error);
2562 }
2563 
2564 /*
2565  * Change mode of a file given a file descriptor.
2566  */
2567 #ifndef _SYS_SYSPROTO_H_
2568 struct fchmod_args {
2569 	int	fd;
2570 	int	mode;
2571 };
2572 #endif
2573 int
2574 fchmod(td, uap)
2575 	struct thread *td;
2576 	register struct fchmod_args /* {
2577 		int fd;
2578 		int mode;
2579 	} */ *uap;
2580 {
2581 	struct file *fp;
2582 	int vfslocked;
2583 	int error;
2584 
2585 	AUDIT_ARG(fd, uap->fd);
2586 	AUDIT_ARG(mode, uap->mode);
2587 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2588 		return (error);
2589 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2590 #ifdef AUDIT
2591 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2592 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2593 	VOP_UNLOCK(fp->f_vnode, 0, td);
2594 #endif
2595 	error = setfmode(td, fp->f_vnode, uap->mode);
2596 	VFS_UNLOCK_GIANT(vfslocked);
2597 	fdrop(fp, td);
2598 	return (error);
2599 }
2600 
2601 /*
2602  * Common implementation for chown(), lchown(), and fchown()
2603  */
2604 static int
2605 setfown(td, vp, uid, gid)
2606 	struct thread *td;
2607 	struct vnode *vp;
2608 	uid_t uid;
2609 	gid_t gid;
2610 {
2611 	int error;
2612 	struct mount *mp;
2613 	struct vattr vattr;
2614 
2615 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2616 		return (error);
2617 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2618 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2619 	VATTR_NULL(&vattr);
2620 	vattr.va_uid = uid;
2621 	vattr.va_gid = gid;
2622 #ifdef MAC
2623 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2624 	    vattr.va_gid);
2625 	if (error == 0)
2626 #endif
2627 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2628 	VOP_UNLOCK(vp, 0, td);
2629 	vn_finished_write(mp);
2630 	return (error);
2631 }
2632 
2633 /*
2634  * Set ownership given a path name.
2635  */
2636 #ifndef _SYS_SYSPROTO_H_
2637 struct chown_args {
2638 	char	*path;
2639 	int	uid;
2640 	int	gid;
2641 };
2642 #endif
2643 int
2644 chown(td, uap)
2645 	struct thread *td;
2646 	register struct chown_args /* {
2647 		char *path;
2648 		int uid;
2649 		int gid;
2650 	} */ *uap;
2651 {
2652 
2653 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2654 }
2655 
2656 int
2657 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2658     int gid)
2659 {
2660 	int error;
2661 	struct nameidata nd;
2662 	int vfslocked;
2663 
2664 	AUDIT_ARG(owner, uid, gid);
2665 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2666 	if ((error = namei(&nd)) != 0)
2667 		return (error);
2668 	vfslocked = NDHASGIANT(&nd);
2669 	NDFREE(&nd, NDF_ONLY_PNBUF);
2670 	error = setfown(td, nd.ni_vp, uid, gid);
2671 	vrele(nd.ni_vp);
2672 	VFS_UNLOCK_GIANT(vfslocked);
2673 	return (error);
2674 }
2675 
2676 /*
2677  * Set ownership given a path name, do not cross symlinks.
2678  */
2679 #ifndef _SYS_SYSPROTO_H_
2680 struct lchown_args {
2681 	char	*path;
2682 	int	uid;
2683 	int	gid;
2684 };
2685 #endif
2686 int
2687 lchown(td, uap)
2688 	struct thread *td;
2689 	register struct lchown_args /* {
2690 		char *path;
2691 		int uid;
2692 		int gid;
2693 	} */ *uap;
2694 {
2695 
2696 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2697 }
2698 
2699 int
2700 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2701     int gid)
2702 {
2703 	int error;
2704 	struct nameidata nd;
2705 	int vfslocked;
2706 
2707 	AUDIT_ARG(owner, uid, gid);
2708 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2709 	if ((error = namei(&nd)) != 0)
2710 		return (error);
2711 	vfslocked = NDHASGIANT(&nd);
2712 	NDFREE(&nd, NDF_ONLY_PNBUF);
2713 	error = setfown(td, nd.ni_vp, uid, gid);
2714 	vrele(nd.ni_vp);
2715 	VFS_UNLOCK_GIANT(vfslocked);
2716 	return (error);
2717 }
2718 
2719 /*
2720  * Set ownership given a file descriptor.
2721  */
2722 #ifndef _SYS_SYSPROTO_H_
2723 struct fchown_args {
2724 	int	fd;
2725 	int	uid;
2726 	int	gid;
2727 };
2728 #endif
2729 int
2730 fchown(td, uap)
2731 	struct thread *td;
2732 	register struct fchown_args /* {
2733 		int fd;
2734 		int uid;
2735 		int gid;
2736 	} */ *uap;
2737 {
2738 	struct file *fp;
2739 	int vfslocked;
2740 	int error;
2741 
2742 	AUDIT_ARG(fd, uap->fd);
2743 	AUDIT_ARG(owner, uap->uid, uap->gid);
2744 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2745 		return (error);
2746 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2747 #ifdef AUDIT
2748 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2749 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2750 	VOP_UNLOCK(fp->f_vnode, 0, td);
2751 #endif
2752 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2753 	VFS_UNLOCK_GIANT(vfslocked);
2754 	fdrop(fp, td);
2755 	return (error);
2756 }
2757 
2758 /*
2759  * Common implementation code for utimes(), lutimes(), and futimes().
2760  */
2761 static int
2762 getutimes(usrtvp, tvpseg, tsp)
2763 	const struct timeval *usrtvp;
2764 	enum uio_seg tvpseg;
2765 	struct timespec *tsp;
2766 {
2767 	struct timeval tv[2];
2768 	const struct timeval *tvp;
2769 	int error;
2770 
2771 	if (usrtvp == NULL) {
2772 		microtime(&tv[0]);
2773 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2774 		tsp[1] = tsp[0];
2775 	} else {
2776 		if (tvpseg == UIO_SYSSPACE) {
2777 			tvp = usrtvp;
2778 		} else {
2779 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2780 				return (error);
2781 			tvp = tv;
2782 		}
2783 
2784 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2785 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2786 			return (EINVAL);
2787 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2788 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2789 	}
2790 	return (0);
2791 }
2792 
2793 /*
2794  * Common implementation code for utimes(), lutimes(), and futimes().
2795  */
2796 static int
2797 setutimes(td, vp, ts, numtimes, nullflag)
2798 	struct thread *td;
2799 	struct vnode *vp;
2800 	const struct timespec *ts;
2801 	int numtimes;
2802 	int nullflag;
2803 {
2804 	int error, setbirthtime;
2805 	struct mount *mp;
2806 	struct vattr vattr;
2807 
2808 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2809 		return (error);
2810 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2811 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2812 	setbirthtime = 0;
2813 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2814 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2815 		setbirthtime = 1;
2816 	VATTR_NULL(&vattr);
2817 	vattr.va_atime = ts[0];
2818 	vattr.va_mtime = ts[1];
2819 	if (setbirthtime)
2820 		vattr.va_birthtime = ts[1];
2821 	if (numtimes > 2)
2822 		vattr.va_birthtime = ts[2];
2823 	if (nullflag)
2824 		vattr.va_vaflags |= VA_UTIMES_NULL;
2825 #ifdef MAC
2826 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2827 	    vattr.va_mtime);
2828 #endif
2829 	if (error == 0)
2830 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2831 	VOP_UNLOCK(vp, 0, td);
2832 	vn_finished_write(mp);
2833 	return (error);
2834 }
2835 
2836 /*
2837  * Set the access and modification times of a file.
2838  */
2839 #ifndef _SYS_SYSPROTO_H_
2840 struct utimes_args {
2841 	char	*path;
2842 	struct	timeval *tptr;
2843 };
2844 #endif
2845 int
2846 utimes(td, uap)
2847 	struct thread *td;
2848 	register struct utimes_args /* {
2849 		char *path;
2850 		struct timeval *tptr;
2851 	} */ *uap;
2852 {
2853 
2854 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2855 	    UIO_USERSPACE));
2856 }
2857 
2858 int
2859 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2860     struct timeval *tptr, enum uio_seg tptrseg)
2861 {
2862 	struct timespec ts[2];
2863 	int error;
2864 	struct nameidata nd;
2865 	int vfslocked;
2866 
2867 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2868 		return (error);
2869 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2870 	if ((error = namei(&nd)) != 0)
2871 		return (error);
2872 	vfslocked = NDHASGIANT(&nd);
2873 	NDFREE(&nd, NDF_ONLY_PNBUF);
2874 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2875 	vrele(nd.ni_vp);
2876 	VFS_UNLOCK_GIANT(vfslocked);
2877 	return (error);
2878 }
2879 
2880 /*
2881  * Set the access and modification times of a file.
2882  */
2883 #ifndef _SYS_SYSPROTO_H_
2884 struct lutimes_args {
2885 	char	*path;
2886 	struct	timeval *tptr;
2887 };
2888 #endif
2889 int
2890 lutimes(td, uap)
2891 	struct thread *td;
2892 	register struct lutimes_args /* {
2893 		char *path;
2894 		struct timeval *tptr;
2895 	} */ *uap;
2896 {
2897 
2898 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2899 	    UIO_USERSPACE));
2900 }
2901 
2902 int
2903 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2904     struct timeval *tptr, enum uio_seg tptrseg)
2905 {
2906 	struct timespec ts[2];
2907 	int error;
2908 	struct nameidata nd;
2909 	int vfslocked;
2910 
2911 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2912 		return (error);
2913 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2914 	if ((error = namei(&nd)) != 0)
2915 		return (error);
2916 	vfslocked = NDHASGIANT(&nd);
2917 	NDFREE(&nd, NDF_ONLY_PNBUF);
2918 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2919 	vrele(nd.ni_vp);
2920 	VFS_UNLOCK_GIANT(vfslocked);
2921 	return (error);
2922 }
2923 
2924 /*
2925  * Set the access and modification times of a file.
2926  */
2927 #ifndef _SYS_SYSPROTO_H_
2928 struct futimes_args {
2929 	int	fd;
2930 	struct	timeval *tptr;
2931 };
2932 #endif
2933 int
2934 futimes(td, uap)
2935 	struct thread *td;
2936 	register struct futimes_args /* {
2937 		int  fd;
2938 		struct timeval *tptr;
2939 	} */ *uap;
2940 {
2941 
2942 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2943 }
2944 
2945 int
2946 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2947     enum uio_seg tptrseg)
2948 {
2949 	struct timespec ts[2];
2950 	struct file *fp;
2951 	int vfslocked;
2952 	int error;
2953 
2954 	AUDIT_ARG(fd, fd);
2955 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2956 		return (error);
2957 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2958 		return (error);
2959 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2960 #ifdef AUDIT
2961 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2962 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2963 	VOP_UNLOCK(fp->f_vnode, 0, td);
2964 #endif
2965 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2966 	VFS_UNLOCK_GIANT(vfslocked);
2967 	fdrop(fp, td);
2968 	return (error);
2969 }
2970 
2971 /*
2972  * Truncate a file given its path name.
2973  */
2974 #ifndef _SYS_SYSPROTO_H_
2975 struct truncate_args {
2976 	char	*path;
2977 	int	pad;
2978 	off_t	length;
2979 };
2980 #endif
2981 int
2982 truncate(td, uap)
2983 	struct thread *td;
2984 	register struct truncate_args /* {
2985 		char *path;
2986 		int pad;
2987 		off_t length;
2988 	} */ *uap;
2989 {
2990 
2991 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2992 }
2993 
2994 int
2995 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2996 {
2997 	struct mount *mp;
2998 	struct vnode *vp;
2999 	struct vattr vattr;
3000 	int error;
3001 	struct nameidata nd;
3002 	int vfslocked;
3003 
3004 	if (length < 0)
3005 		return(EINVAL);
3006 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3007 	if ((error = namei(&nd)) != 0)
3008 		return (error);
3009 	vfslocked = NDHASGIANT(&nd);
3010 	vp = nd.ni_vp;
3011 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3012 		vrele(vp);
3013 		VFS_UNLOCK_GIANT(vfslocked);
3014 		return (error);
3015 	}
3016 	NDFREE(&nd, NDF_ONLY_PNBUF);
3017 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3018 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3019 	if (vp->v_type == VDIR)
3020 		error = EISDIR;
3021 #ifdef MAC
3022 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3023 	}
3024 #endif
3025 	else if ((error = vn_writechk(vp)) == 0 &&
3026 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3027 		VATTR_NULL(&vattr);
3028 		vattr.va_size = length;
3029 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3030 	}
3031 	vput(vp);
3032 	vn_finished_write(mp);
3033 	VFS_UNLOCK_GIANT(vfslocked);
3034 	return (error);
3035 }
3036 
3037 /*
3038  * Truncate a file given a file descriptor.
3039  */
3040 #ifndef _SYS_SYSPROTO_H_
3041 struct ftruncate_args {
3042 	int	fd;
3043 	int	pad;
3044 	off_t	length;
3045 };
3046 #endif
3047 int
3048 ftruncate(td, uap)
3049 	struct thread *td;
3050 	register struct ftruncate_args /* {
3051 		int fd;
3052 		int pad;
3053 		off_t length;
3054 	} */ *uap;
3055 {
3056 	struct mount *mp;
3057 	struct vattr vattr;
3058 	struct vnode *vp;
3059 	struct file *fp;
3060 	int vfslocked;
3061 	int error;
3062 
3063 	AUDIT_ARG(fd, uap->fd);
3064 	if (uap->length < 0)
3065 		return(EINVAL);
3066 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3067 		return (error);
3068 	if ((fp->f_flag & FWRITE) == 0) {
3069 		fdrop(fp, td);
3070 		return (EINVAL);
3071 	}
3072 	vp = fp->f_vnode;
3073 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3074 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3075 		goto drop;
3076 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3077 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3078 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3079 	if (vp->v_type == VDIR)
3080 		error = EISDIR;
3081 #ifdef MAC
3082 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3083 	    vp))) {
3084 	}
3085 #endif
3086 	else if ((error = vn_writechk(vp)) == 0) {
3087 		VATTR_NULL(&vattr);
3088 		vattr.va_size = uap->length;
3089 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3090 	}
3091 	VOP_UNLOCK(vp, 0, td);
3092 	vn_finished_write(mp);
3093 drop:
3094 	VFS_UNLOCK_GIANT(vfslocked);
3095 	fdrop(fp, td);
3096 	return (error);
3097 }
3098 
3099 #if defined(COMPAT_43)
3100 /*
3101  * Truncate a file given its path name.
3102  */
3103 #ifndef _SYS_SYSPROTO_H_
3104 struct otruncate_args {
3105 	char	*path;
3106 	long	length;
3107 };
3108 #endif
3109 int
3110 otruncate(td, uap)
3111 	struct thread *td;
3112 	register struct otruncate_args /* {
3113 		char *path;
3114 		long length;
3115 	} */ *uap;
3116 {
3117 	struct truncate_args /* {
3118 		char *path;
3119 		int pad;
3120 		off_t length;
3121 	} */ nuap;
3122 
3123 	nuap.path = uap->path;
3124 	nuap.length = uap->length;
3125 	return (truncate(td, &nuap));
3126 }
3127 
3128 /*
3129  * Truncate a file given a file descriptor.
3130  */
3131 #ifndef _SYS_SYSPROTO_H_
3132 struct oftruncate_args {
3133 	int	fd;
3134 	long	length;
3135 };
3136 #endif
3137 int
3138 oftruncate(td, uap)
3139 	struct thread *td;
3140 	register struct oftruncate_args /* {
3141 		int fd;
3142 		long length;
3143 	} */ *uap;
3144 {
3145 	struct ftruncate_args /* {
3146 		int fd;
3147 		int pad;
3148 		off_t length;
3149 	} */ nuap;
3150 
3151 	nuap.fd = uap->fd;
3152 	nuap.length = uap->length;
3153 	return (ftruncate(td, &nuap));
3154 }
3155 #endif /* COMPAT_43 */
3156 
3157 /*
3158  * Sync an open file.
3159  */
3160 #ifndef _SYS_SYSPROTO_H_
3161 struct fsync_args {
3162 	int	fd;
3163 };
3164 #endif
3165 int
3166 fsync(td, uap)
3167 	struct thread *td;
3168 	struct fsync_args /* {
3169 		int fd;
3170 	} */ *uap;
3171 {
3172 	struct vnode *vp;
3173 	struct mount *mp;
3174 	struct file *fp;
3175 	int vfslocked;
3176 	int error;
3177 
3178 	AUDIT_ARG(fd, uap->fd);
3179 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3180 		return (error);
3181 	vp = fp->f_vnode;
3182 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3183 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3184 		goto drop;
3185 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3186 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3187 	if (vp->v_object != NULL) {
3188 		VM_OBJECT_LOCK(vp->v_object);
3189 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3190 		VM_OBJECT_UNLOCK(vp->v_object);
3191 	}
3192 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3193 
3194 	VOP_UNLOCK(vp, 0, td);
3195 	vn_finished_write(mp);
3196 drop:
3197 	VFS_UNLOCK_GIANT(vfslocked);
3198 	fdrop(fp, td);
3199 	return (error);
3200 }
3201 
3202 /*
3203  * Rename files.  Source and destination must either both be directories, or
3204  * both not be directories.  If target is a directory, it must be empty.
3205  */
3206 #ifndef _SYS_SYSPROTO_H_
3207 struct rename_args {
3208 	char	*from;
3209 	char	*to;
3210 };
3211 #endif
3212 int
3213 rename(td, uap)
3214 	struct thread *td;
3215 	register struct rename_args /* {
3216 		char *from;
3217 		char *to;
3218 	} */ *uap;
3219 {
3220 
3221 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3222 }
3223 
3224 int
3225 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3226 {
3227 	struct mount *mp = NULL;
3228 	struct vnode *tvp, *fvp, *tdvp;
3229 	struct nameidata fromnd, tond;
3230 	int tvfslocked;
3231 	int fvfslocked;
3232 	int error;
3233 
3234 	bwillwrite();
3235 #ifdef MAC
3236 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3237 	    AUDITVNODE1, pathseg, from, td);
3238 #else
3239 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3240 	    AUDITVNODE1, pathseg, from, td);
3241 #endif
3242 	if ((error = namei(&fromnd)) != 0)
3243 		return (error);
3244 	fvfslocked = NDHASGIANT(&fromnd);
3245 	tvfslocked = 0;
3246 #ifdef MAC
3247 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3248 	    fromnd.ni_vp, &fromnd.ni_cnd);
3249 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3250 	if (fromnd.ni_dvp != fromnd.ni_vp)
3251 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3252 #endif
3253 	fvp = fromnd.ni_vp;
3254 	if (error == 0)
3255 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3256 	if (error != 0) {
3257 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3258 		vrele(fromnd.ni_dvp);
3259 		vrele(fvp);
3260 		goto out1;
3261 	}
3262 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3263 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3264 	if (fromnd.ni_vp->v_type == VDIR)
3265 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3266 	if ((error = namei(&tond)) != 0) {
3267 		/* Translate error code for rename("dir1", "dir2/."). */
3268 		if (error == EISDIR && fvp->v_type == VDIR)
3269 			error = EINVAL;
3270 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3271 		vrele(fromnd.ni_dvp);
3272 		vrele(fvp);
3273 		vn_finished_write(mp);
3274 		goto out1;
3275 	}
3276 	tvfslocked = NDHASGIANT(&tond);
3277 	tdvp = tond.ni_dvp;
3278 	tvp = tond.ni_vp;
3279 	if (tvp != NULL) {
3280 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3281 			error = ENOTDIR;
3282 			goto out;
3283 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3284 			error = EISDIR;
3285 			goto out;
3286 		}
3287 	}
3288 	if (fvp == tdvp)
3289 		error = EINVAL;
3290 	/*
3291 	 * If the source is the same as the destination (that is, if they
3292 	 * are links to the same vnode), then there is nothing to do.
3293 	 */
3294 	if (fvp == tvp)
3295 		error = -1;
3296 #ifdef MAC
3297 	else
3298 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3299 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3300 #endif
3301 out:
3302 	if (!error) {
3303 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3304 		if (fromnd.ni_dvp != tdvp) {
3305 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3306 		}
3307 		if (tvp) {
3308 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3309 		}
3310 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3311 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3312 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3313 		NDFREE(&tond, NDF_ONLY_PNBUF);
3314 	} else {
3315 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3316 		NDFREE(&tond, NDF_ONLY_PNBUF);
3317 		if (tvp)
3318 			vput(tvp);
3319 		if (tdvp == tvp)
3320 			vrele(tdvp);
3321 		else
3322 			vput(tdvp);
3323 		vrele(fromnd.ni_dvp);
3324 		vrele(fvp);
3325 	}
3326 	vrele(tond.ni_startdir);
3327 	vn_finished_write(mp);
3328 out1:
3329 	if (fromnd.ni_startdir)
3330 		vrele(fromnd.ni_startdir);
3331 	VFS_UNLOCK_GIANT(fvfslocked);
3332 	VFS_UNLOCK_GIANT(tvfslocked);
3333 	if (error == -1)
3334 		return (0);
3335 	return (error);
3336 }
3337 
3338 /*
3339  * Make a directory file.
3340  */
3341 #ifndef _SYS_SYSPROTO_H_
3342 struct mkdir_args {
3343 	char	*path;
3344 	int	mode;
3345 };
3346 #endif
3347 int
3348 mkdir(td, uap)
3349 	struct thread *td;
3350 	register struct mkdir_args /* {
3351 		char *path;
3352 		int mode;
3353 	} */ *uap;
3354 {
3355 
3356 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3357 }
3358 
3359 int
3360 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3361 {
3362 	struct mount *mp;
3363 	struct vnode *vp;
3364 	struct vattr vattr;
3365 	int error;
3366 	struct nameidata nd;
3367 	int vfslocked;
3368 
3369 	AUDIT_ARG(mode, mode);
3370 restart:
3371 	bwillwrite();
3372 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3373 	    segflg, path, td);
3374 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3375 	if ((error = namei(&nd)) != 0)
3376 		return (error);
3377 	vfslocked = NDHASGIANT(&nd);
3378 	vp = nd.ni_vp;
3379 	if (vp != NULL) {
3380 		NDFREE(&nd, NDF_ONLY_PNBUF);
3381 		/*
3382 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3383 		 * the strange behaviour of leaving the vnode unlocked
3384 		 * if the target is the same vnode as the parent.
3385 		 */
3386 		if (vp == nd.ni_dvp)
3387 			vrele(nd.ni_dvp);
3388 		else
3389 			vput(nd.ni_dvp);
3390 		vrele(vp);
3391 		VFS_UNLOCK_GIANT(vfslocked);
3392 		return (EEXIST);
3393 	}
3394 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3395 		NDFREE(&nd, NDF_ONLY_PNBUF);
3396 		vput(nd.ni_dvp);
3397 		VFS_UNLOCK_GIANT(vfslocked);
3398 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3399 			return (error);
3400 		goto restart;
3401 	}
3402 	VATTR_NULL(&vattr);
3403 	vattr.va_type = VDIR;
3404 	FILEDESC_SLOCK(td->td_proc->p_fd);
3405 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3406 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3407 #ifdef MAC
3408 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3409 	    &vattr);
3410 	if (error)
3411 		goto out;
3412 #endif
3413 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3414 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3415 #ifdef MAC
3416 out:
3417 #endif
3418 	NDFREE(&nd, NDF_ONLY_PNBUF);
3419 	vput(nd.ni_dvp);
3420 	if (!error)
3421 		vput(nd.ni_vp);
3422 	vn_finished_write(mp);
3423 	VFS_UNLOCK_GIANT(vfslocked);
3424 	return (error);
3425 }
3426 
3427 /*
3428  * Remove a directory file.
3429  */
3430 #ifndef _SYS_SYSPROTO_H_
3431 struct rmdir_args {
3432 	char	*path;
3433 };
3434 #endif
3435 int
3436 rmdir(td, uap)
3437 	struct thread *td;
3438 	struct rmdir_args /* {
3439 		char *path;
3440 	} */ *uap;
3441 {
3442 
3443 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3444 }
3445 
3446 int
3447 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3448 {
3449 	struct mount *mp;
3450 	struct vnode *vp;
3451 	int error;
3452 	struct nameidata nd;
3453 	int vfslocked;
3454 
3455 restart:
3456 	bwillwrite();
3457 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3458 	    pathseg, path, td);
3459 	if ((error = namei(&nd)) != 0)
3460 		return (error);
3461 	vfslocked = NDHASGIANT(&nd);
3462 	vp = nd.ni_vp;
3463 	if (vp->v_type != VDIR) {
3464 		error = ENOTDIR;
3465 		goto out;
3466 	}
3467 	/*
3468 	 * No rmdir "." please.
3469 	 */
3470 	if (nd.ni_dvp == vp) {
3471 		error = EINVAL;
3472 		goto out;
3473 	}
3474 	/*
3475 	 * The root of a mounted filesystem cannot be deleted.
3476 	 */
3477 	if (vp->v_vflag & VV_ROOT) {
3478 		error = EBUSY;
3479 		goto out;
3480 	}
3481 #ifdef MAC
3482 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3483 	    &nd.ni_cnd);
3484 	if (error)
3485 		goto out;
3486 #endif
3487 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3488 		NDFREE(&nd, NDF_ONLY_PNBUF);
3489 		vput(vp);
3490 		if (nd.ni_dvp == vp)
3491 			vrele(nd.ni_dvp);
3492 		else
3493 			vput(nd.ni_dvp);
3494 		VFS_UNLOCK_GIANT(vfslocked);
3495 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3496 			return (error);
3497 		goto restart;
3498 	}
3499 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3500 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3501 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3502 	vn_finished_write(mp);
3503 out:
3504 	NDFREE(&nd, NDF_ONLY_PNBUF);
3505 	vput(vp);
3506 	if (nd.ni_dvp == vp)
3507 		vrele(nd.ni_dvp);
3508 	else
3509 		vput(nd.ni_dvp);
3510 	VFS_UNLOCK_GIANT(vfslocked);
3511 	return (error);
3512 }
3513 
3514 #ifdef COMPAT_43
3515 /*
3516  * Read a block of directory entries in a filesystem independent format.
3517  */
3518 #ifndef _SYS_SYSPROTO_H_
3519 struct ogetdirentries_args {
3520 	int	fd;
3521 	char	*buf;
3522 	u_int	count;
3523 	long	*basep;
3524 };
3525 #endif
3526 int
3527 ogetdirentries(td, uap)
3528 	struct thread *td;
3529 	register struct ogetdirentries_args /* {
3530 		int fd;
3531 		char *buf;
3532 		u_int count;
3533 		long *basep;
3534 	} */ *uap;
3535 {
3536 	struct vnode *vp;
3537 	struct file *fp;
3538 	struct uio auio, kuio;
3539 	struct iovec aiov, kiov;
3540 	struct dirent *dp, *edp;
3541 	caddr_t dirbuf;
3542 	int error, eofflag, readcnt, vfslocked;
3543 	long loff;
3544 
3545 	/* XXX arbitrary sanity limit on `count'. */
3546 	if (uap->count > 64 * 1024)
3547 		return (EINVAL);
3548 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3549 		return (error);
3550 	if ((fp->f_flag & FREAD) == 0) {
3551 		fdrop(fp, td);
3552 		return (EBADF);
3553 	}
3554 	vp = fp->f_vnode;
3555 unionread:
3556 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3557 	if (vp->v_type != VDIR) {
3558 		VFS_UNLOCK_GIANT(vfslocked);
3559 		fdrop(fp, td);
3560 		return (EINVAL);
3561 	}
3562 	aiov.iov_base = uap->buf;
3563 	aiov.iov_len = uap->count;
3564 	auio.uio_iov = &aiov;
3565 	auio.uio_iovcnt = 1;
3566 	auio.uio_rw = UIO_READ;
3567 	auio.uio_segflg = UIO_USERSPACE;
3568 	auio.uio_td = td;
3569 	auio.uio_resid = uap->count;
3570 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3571 	loff = auio.uio_offset = fp->f_offset;
3572 #ifdef MAC
3573 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3574 	if (error) {
3575 		VOP_UNLOCK(vp, 0, td);
3576 		VFS_UNLOCK_GIANT(vfslocked);
3577 		fdrop(fp, td);
3578 		return (error);
3579 	}
3580 #endif
3581 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3582 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3583 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3584 			    NULL, NULL);
3585 			fp->f_offset = auio.uio_offset;
3586 		} else
3587 #	endif
3588 	{
3589 		kuio = auio;
3590 		kuio.uio_iov = &kiov;
3591 		kuio.uio_segflg = UIO_SYSSPACE;
3592 		kiov.iov_len = uap->count;
3593 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3594 		kiov.iov_base = dirbuf;
3595 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3596 			    NULL, NULL);
3597 		fp->f_offset = kuio.uio_offset;
3598 		if (error == 0) {
3599 			readcnt = uap->count - kuio.uio_resid;
3600 			edp = (struct dirent *)&dirbuf[readcnt];
3601 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3602 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3603 					/*
3604 					 * The expected low byte of
3605 					 * dp->d_namlen is our dp->d_type.
3606 					 * The high MBZ byte of dp->d_namlen
3607 					 * is our dp->d_namlen.
3608 					 */
3609 					dp->d_type = dp->d_namlen;
3610 					dp->d_namlen = 0;
3611 #				else
3612 					/*
3613 					 * The dp->d_type is the high byte
3614 					 * of the expected dp->d_namlen,
3615 					 * so must be zero'ed.
3616 					 */
3617 					dp->d_type = 0;
3618 #				endif
3619 				if (dp->d_reclen > 0) {
3620 					dp = (struct dirent *)
3621 					    ((char *)dp + dp->d_reclen);
3622 				} else {
3623 					error = EIO;
3624 					break;
3625 				}
3626 			}
3627 			if (dp >= edp)
3628 				error = uiomove(dirbuf, readcnt, &auio);
3629 		}
3630 		FREE(dirbuf, M_TEMP);
3631 	}
3632 	if (error) {
3633 		VOP_UNLOCK(vp, 0, td);
3634 		VFS_UNLOCK_GIANT(vfslocked);
3635 		fdrop(fp, td);
3636 		return (error);
3637 	}
3638 	if (uap->count == auio.uio_resid &&
3639 	    (vp->v_vflag & VV_ROOT) &&
3640 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3641 		struct vnode *tvp = vp;
3642 		vp = vp->v_mount->mnt_vnodecovered;
3643 		VREF(vp);
3644 		fp->f_vnode = vp;
3645 		fp->f_data = vp;
3646 		fp->f_offset = 0;
3647 		vput(tvp);
3648 		VFS_UNLOCK_GIANT(vfslocked);
3649 		goto unionread;
3650 	}
3651 	VOP_UNLOCK(vp, 0, td);
3652 	VFS_UNLOCK_GIANT(vfslocked);
3653 	error = copyout(&loff, uap->basep, sizeof(long));
3654 	fdrop(fp, td);
3655 	td->td_retval[0] = uap->count - auio.uio_resid;
3656 	return (error);
3657 }
3658 #endif /* COMPAT_43 */
3659 
3660 /*
3661  * Read a block of directory entries in a filesystem independent format.
3662  */
3663 #ifndef _SYS_SYSPROTO_H_
3664 struct getdirentries_args {
3665 	int	fd;
3666 	char	*buf;
3667 	u_int	count;
3668 	long	*basep;
3669 };
3670 #endif
3671 int
3672 getdirentries(td, uap)
3673 	struct thread *td;
3674 	register struct getdirentries_args /* {
3675 		int fd;
3676 		char *buf;
3677 		u_int count;
3678 		long *basep;
3679 	} */ *uap;
3680 {
3681 	struct vnode *vp;
3682 	struct file *fp;
3683 	struct uio auio;
3684 	struct iovec aiov;
3685 	int vfslocked;
3686 	long loff;
3687 	int error, eofflag;
3688 
3689 	AUDIT_ARG(fd, uap->fd);
3690 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3691 		return (error);
3692 	if ((fp->f_flag & FREAD) == 0) {
3693 		fdrop(fp, td);
3694 		return (EBADF);
3695 	}
3696 	vp = fp->f_vnode;
3697 unionread:
3698 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3699 	if (vp->v_type != VDIR) {
3700 		VFS_UNLOCK_GIANT(vfslocked);
3701 		error = EINVAL;
3702 		goto fail;
3703 	}
3704 	aiov.iov_base = uap->buf;
3705 	aiov.iov_len = uap->count;
3706 	auio.uio_iov = &aiov;
3707 	auio.uio_iovcnt = 1;
3708 	auio.uio_rw = UIO_READ;
3709 	auio.uio_segflg = UIO_USERSPACE;
3710 	auio.uio_td = td;
3711 	auio.uio_resid = uap->count;
3712 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3713 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3714 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3715 	loff = auio.uio_offset = fp->f_offset;
3716 #ifdef MAC
3717 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3718 	if (error == 0)
3719 #endif
3720 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3721 		    NULL);
3722 	fp->f_offset = auio.uio_offset;
3723 	if (error) {
3724 		VOP_UNLOCK(vp, 0, td);
3725 		VFS_UNLOCK_GIANT(vfslocked);
3726 		goto fail;
3727 	}
3728 	if (uap->count == auio.uio_resid &&
3729 	    (vp->v_vflag & VV_ROOT) &&
3730 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3731 		struct vnode *tvp = vp;
3732 		vp = vp->v_mount->mnt_vnodecovered;
3733 		VREF(vp);
3734 		fp->f_vnode = vp;
3735 		fp->f_data = vp;
3736 		fp->f_offset = 0;
3737 		vput(tvp);
3738 		VFS_UNLOCK_GIANT(vfslocked);
3739 		goto unionread;
3740 	}
3741 	VOP_UNLOCK(vp, 0, td);
3742 	VFS_UNLOCK_GIANT(vfslocked);
3743 	if (uap->basep != NULL) {
3744 		error = copyout(&loff, uap->basep, sizeof(long));
3745 	}
3746 	td->td_retval[0] = uap->count - auio.uio_resid;
3747 fail:
3748 	fdrop(fp, td);
3749 	return (error);
3750 }
3751 
3752 #ifndef _SYS_SYSPROTO_H_
3753 struct getdents_args {
3754 	int fd;
3755 	char *buf;
3756 	size_t count;
3757 };
3758 #endif
3759 int
3760 getdents(td, uap)
3761 	struct thread *td;
3762 	register struct getdents_args /* {
3763 		int fd;
3764 		char *buf;
3765 		u_int count;
3766 	} */ *uap;
3767 {
3768 	struct getdirentries_args ap;
3769 	ap.fd = uap->fd;
3770 	ap.buf = uap->buf;
3771 	ap.count = uap->count;
3772 	ap.basep = NULL;
3773 	return (getdirentries(td, &ap));
3774 }
3775 
3776 /*
3777  * Set the mode mask for creation of filesystem nodes.
3778  */
3779 #ifndef _SYS_SYSPROTO_H_
3780 struct umask_args {
3781 	int	newmask;
3782 };
3783 #endif
3784 int
3785 umask(td, uap)
3786 	struct thread *td;
3787 	struct umask_args /* {
3788 		int newmask;
3789 	} */ *uap;
3790 {
3791 	register struct filedesc *fdp;
3792 
3793 	FILEDESC_XLOCK(td->td_proc->p_fd);
3794 	fdp = td->td_proc->p_fd;
3795 	td->td_retval[0] = fdp->fd_cmask;
3796 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3797 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
3798 	return (0);
3799 }
3800 
3801 /*
3802  * Void all references to file by ripping underlying filesystem away from
3803  * vnode.
3804  */
3805 #ifndef _SYS_SYSPROTO_H_
3806 struct revoke_args {
3807 	char	*path;
3808 };
3809 #endif
3810 int
3811 revoke(td, uap)
3812 	struct thread *td;
3813 	register struct revoke_args /* {
3814 		char *path;
3815 	} */ *uap;
3816 {
3817 	struct vnode *vp;
3818 	struct vattr vattr;
3819 	int error;
3820 	struct nameidata nd;
3821 	int vfslocked;
3822 
3823 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3824 	    UIO_USERSPACE, uap->path, td);
3825 	if ((error = namei(&nd)) != 0)
3826 		return (error);
3827 	vfslocked = NDHASGIANT(&nd);
3828 	vp = nd.ni_vp;
3829 	NDFREE(&nd, NDF_ONLY_PNBUF);
3830 	if (vp->v_type != VCHR) {
3831 		error = EINVAL;
3832 		goto out;
3833 	}
3834 #ifdef MAC
3835 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3836 	if (error)
3837 		goto out;
3838 #endif
3839 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3840 	if (error)
3841 		goto out;
3842 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3843 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3844 		    SUSER_ALLOWJAIL);
3845 		if (error)
3846 			goto out;
3847 	}
3848 	if (vcount(vp) > 1)
3849 		VOP_REVOKE(vp, REVOKEALL);
3850 out:
3851 	vput(vp);
3852 	VFS_UNLOCK_GIANT(vfslocked);
3853 	return (error);
3854 }
3855 
3856 /*
3857  * Convert a user file descriptor to a kernel file entry.
3858  * A reference on the file entry is held upon returning.
3859  */
3860 int
3861 getvnode(fdp, fd, fpp)
3862 	struct filedesc *fdp;
3863 	int fd;
3864 	struct file **fpp;
3865 {
3866 	int error;
3867 	struct file *fp;
3868 
3869 	fp = NULL;
3870 	if (fdp == NULL)
3871 		error = EBADF;
3872 	else {
3873 		FILEDESC_SLOCK(fdp);
3874 		if ((u_int)fd >= fdp->fd_nfiles ||
3875 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3876 			error = EBADF;
3877 		else if (fp->f_vnode == NULL) {
3878 			fp = NULL;
3879 			error = EINVAL;
3880 		} else {
3881 			fhold(fp);
3882 			error = 0;
3883 		}
3884 		FILEDESC_SUNLOCK(fdp);
3885 	}
3886 	*fpp = fp;
3887 	return (error);
3888 }
3889 
3890 /*
3891  * Get an (NFS) file handle.
3892  */
3893 #ifndef _SYS_SYSPROTO_H_
3894 struct lgetfh_args {
3895 	char	*fname;
3896 	fhandle_t *fhp;
3897 };
3898 #endif
3899 int
3900 lgetfh(td, uap)
3901 	struct thread *td;
3902 	register struct lgetfh_args *uap;
3903 {
3904 	struct nameidata nd;
3905 	fhandle_t fh;
3906 	register struct vnode *vp;
3907 	int vfslocked;
3908 	int error;
3909 
3910 	error = priv_check(td, PRIV_VFS_GETFH);
3911 	if (error)
3912 		return (error);
3913 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3914 	    UIO_USERSPACE, uap->fname, td);
3915 	error = namei(&nd);
3916 	if (error)
3917 		return (error);
3918 	vfslocked = NDHASGIANT(&nd);
3919 	NDFREE(&nd, NDF_ONLY_PNBUF);
3920 	vp = nd.ni_vp;
3921 	bzero(&fh, sizeof(fh));
3922 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3923 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3924 	vput(vp);
3925 	VFS_UNLOCK_GIANT(vfslocked);
3926 	if (error)
3927 		return (error);
3928 	error = copyout(&fh, uap->fhp, sizeof (fh));
3929 	return (error);
3930 }
3931 
3932 #ifndef _SYS_SYSPROTO_H_
3933 struct getfh_args {
3934 	char	*fname;
3935 	fhandle_t *fhp;
3936 };
3937 #endif
3938 int
3939 getfh(td, uap)
3940 	struct thread *td;
3941 	register struct getfh_args *uap;
3942 {
3943 	struct nameidata nd;
3944 	fhandle_t fh;
3945 	register struct vnode *vp;
3946 	int vfslocked;
3947 	int error;
3948 
3949 	error = priv_check(td, PRIV_VFS_GETFH);
3950 	if (error)
3951 		return (error);
3952 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3953 	    UIO_USERSPACE, uap->fname, td);
3954 	error = namei(&nd);
3955 	if (error)
3956 		return (error);
3957 	vfslocked = NDHASGIANT(&nd);
3958 	NDFREE(&nd, NDF_ONLY_PNBUF);
3959 	vp = nd.ni_vp;
3960 	bzero(&fh, sizeof(fh));
3961 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3962 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3963 	vput(vp);
3964 	VFS_UNLOCK_GIANT(vfslocked);
3965 	if (error)
3966 		return (error);
3967 	error = copyout(&fh, uap->fhp, sizeof (fh));
3968 	return (error);
3969 }
3970 
3971 /*
3972  * syscall for the rpc.lockd to use to translate a NFS file handle into an
3973  * open descriptor.
3974  *
3975  * warning: do not remove the priv_check() call or this becomes one giant
3976  * security hole.
3977  */
3978 #ifndef _SYS_SYSPROTO_H_
3979 struct fhopen_args {
3980 	const struct fhandle *u_fhp;
3981 	int flags;
3982 };
3983 #endif
3984 int
3985 fhopen(td, uap)
3986 	struct thread *td;
3987 	struct fhopen_args /* {
3988 		const struct fhandle *u_fhp;
3989 		int flags;
3990 	} */ *uap;
3991 {
3992 	struct proc *p = td->td_proc;
3993 	struct mount *mp;
3994 	struct vnode *vp;
3995 	struct fhandle fhp;
3996 	struct vattr vat;
3997 	struct vattr *vap = &vat;
3998 	struct flock lf;
3999 	struct file *fp;
4000 	register struct filedesc *fdp = p->p_fd;
4001 	int fmode, mode, error, type;
4002 	struct file *nfp;
4003 	int vfslocked;
4004 	int indx;
4005 
4006 	error = priv_check(td, PRIV_VFS_FHOPEN);
4007 	if (error)
4008 		return (error);
4009 	fmode = FFLAGS(uap->flags);
4010 	/* why not allow a non-read/write open for our lockd? */
4011 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4012 		return (EINVAL);
4013 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4014 	if (error)
4015 		return(error);
4016 	/* find the mount point */
4017 	mp = vfs_getvfs(&fhp.fh_fsid);
4018 	if (mp == NULL)
4019 		return (ESTALE);
4020 	vfslocked = VFS_LOCK_GIANT(mp);
4021 	/* now give me my vnode, it gets returned to me locked */
4022 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4023 	if (error)
4024 		goto out;
4025 	/*
4026 	 * from now on we have to make sure not
4027 	 * to forget about the vnode
4028 	 * any error that causes an abort must vput(vp)
4029 	 * just set error = err and 'goto bad;'.
4030 	 */
4031 
4032 	/*
4033 	 * from vn_open
4034 	 */
4035 	if (vp->v_type == VLNK) {
4036 		error = EMLINK;
4037 		goto bad;
4038 	}
4039 	if (vp->v_type == VSOCK) {
4040 		error = EOPNOTSUPP;
4041 		goto bad;
4042 	}
4043 	mode = 0;
4044 	if (fmode & (FWRITE | O_TRUNC)) {
4045 		if (vp->v_type == VDIR) {
4046 			error = EISDIR;
4047 			goto bad;
4048 		}
4049 		error = vn_writechk(vp);
4050 		if (error)
4051 			goto bad;
4052 		mode |= VWRITE;
4053 	}
4054 	if (fmode & FREAD)
4055 		mode |= VREAD;
4056 	if (fmode & O_APPEND)
4057 		mode |= VAPPEND;
4058 #ifdef MAC
4059 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4060 	if (error)
4061 		goto bad;
4062 #endif
4063 	if (mode) {
4064 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4065 		if (error)
4066 			goto bad;
4067 	}
4068 	if (fmode & O_TRUNC) {
4069 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4070 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4071 			vrele(vp);
4072 			goto out;
4073 		}
4074 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4075 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4076 #ifdef MAC
4077 		/*
4078 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4079 		 * should be right.
4080 		 */
4081 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4082 		if (error == 0) {
4083 #endif
4084 			VATTR_NULL(vap);
4085 			vap->va_size = 0;
4086 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4087 #ifdef MAC
4088 		}
4089 #endif
4090 		vn_finished_write(mp);
4091 		if (error)
4092 			goto bad;
4093 	}
4094 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4095 	if (error)
4096 		goto bad;
4097 
4098 	if (fmode & FWRITE)
4099 		vp->v_writecount++;
4100 
4101 	/*
4102 	 * end of vn_open code
4103 	 */
4104 
4105 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4106 		if (fmode & FWRITE)
4107 			vp->v_writecount--;
4108 		goto bad;
4109 	}
4110 	/* An extra reference on `nfp' has been held for us by falloc(). */
4111 	fp = nfp;
4112 
4113 	FILE_LOCK(nfp);
4114 	nfp->f_vnode = vp;
4115 	nfp->f_data = vp;
4116 	nfp->f_flag = fmode & FMASK;
4117 	nfp->f_type = DTYPE_VNODE;
4118 	nfp->f_ops = &vnops;
4119 	FILE_UNLOCK(nfp);
4120 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4121 		lf.l_whence = SEEK_SET;
4122 		lf.l_start = 0;
4123 		lf.l_len = 0;
4124 		if (fmode & O_EXLOCK)
4125 			lf.l_type = F_WRLCK;
4126 		else
4127 			lf.l_type = F_RDLCK;
4128 		type = F_FLOCK;
4129 		if ((fmode & FNONBLOCK) == 0)
4130 			type |= F_WAIT;
4131 		VOP_UNLOCK(vp, 0, td);
4132 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4133 			    type)) != 0) {
4134 			/*
4135 			 * The lock request failed.  Normally close the
4136 			 * descriptor but handle the case where someone might
4137 			 * have dup()d or close()d it when we weren't looking.
4138 			 */
4139 			fdclose(fdp, fp, indx, td);
4140 
4141 			/*
4142 			 * release our private reference
4143 			 */
4144 			fdrop(fp, td);
4145 			goto out;
4146 		}
4147 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4148 		fp->f_flag |= FHASLOCK;
4149 	}
4150 
4151 	VOP_UNLOCK(vp, 0, td);
4152 	fdrop(fp, td);
4153 	vfs_rel(mp);
4154 	VFS_UNLOCK_GIANT(vfslocked);
4155 	td->td_retval[0] = indx;
4156 	return (0);
4157 
4158 bad:
4159 	vput(vp);
4160 out:
4161 	vfs_rel(mp);
4162 	VFS_UNLOCK_GIANT(vfslocked);
4163 	return (error);
4164 }
4165 
4166 /*
4167  * Stat an (NFS) file handle.
4168  */
4169 #ifndef _SYS_SYSPROTO_H_
4170 struct fhstat_args {
4171 	struct fhandle *u_fhp;
4172 	struct stat *sb;
4173 };
4174 #endif
4175 int
4176 fhstat(td, uap)
4177 	struct thread *td;
4178 	register struct fhstat_args /* {
4179 		struct fhandle *u_fhp;
4180 		struct stat *sb;
4181 	} */ *uap;
4182 {
4183 	struct stat sb;
4184 	fhandle_t fh;
4185 	struct mount *mp;
4186 	struct vnode *vp;
4187 	int vfslocked;
4188 	int error;
4189 
4190 	error = priv_check(td, PRIV_VFS_FHSTAT);
4191 	if (error)
4192 		return (error);
4193 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4194 	if (error)
4195 		return (error);
4196 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4197 		return (ESTALE);
4198 	vfslocked = VFS_LOCK_GIANT(mp);
4199 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4200 		vfs_rel(mp);
4201 		VFS_UNLOCK_GIANT(vfslocked);
4202 		return (error);
4203 	}
4204 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4205 	vput(vp);
4206 	vfs_rel(mp);
4207 	VFS_UNLOCK_GIANT(vfslocked);
4208 	if (error)
4209 		return (error);
4210 	error = copyout(&sb, uap->sb, sizeof(sb));
4211 	return (error);
4212 }
4213 
4214 /*
4215  * Implement fstatfs() for (NFS) file handles.
4216  */
4217 #ifndef _SYS_SYSPROTO_H_
4218 struct fhstatfs_args {
4219 	struct fhandle *u_fhp;
4220 	struct statfs *buf;
4221 };
4222 #endif
4223 int
4224 fhstatfs(td, uap)
4225 	struct thread *td;
4226 	struct fhstatfs_args /* {
4227 		struct fhandle *u_fhp;
4228 		struct statfs *buf;
4229 	} */ *uap;
4230 {
4231 	struct statfs sf;
4232 	fhandle_t fh;
4233 	int error;
4234 
4235 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4236 	if (error)
4237 		return (error);
4238 	error = kern_fhstatfs(td, fh, &sf);
4239 	if (error)
4240 		return (error);
4241 	return (copyout(&sf, uap->buf, sizeof(sf)));
4242 }
4243 
4244 int
4245 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4246 {
4247 	struct statfs *sp;
4248 	struct mount *mp;
4249 	struct vnode *vp;
4250 	int vfslocked;
4251 	int error;
4252 
4253 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4254 	if (error)
4255 		return (error);
4256 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4257 		return (ESTALE);
4258 	vfslocked = VFS_LOCK_GIANT(mp);
4259 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4260 	if (error) {
4261 		VFS_UNLOCK_GIANT(vfslocked);
4262 		vfs_rel(mp);
4263 		return (error);
4264 	}
4265 	vput(vp);
4266 	error = prison_canseemount(td->td_ucred, mp);
4267 	if (error)
4268 		goto out;
4269 #ifdef MAC
4270 	error = mac_check_mount_stat(td->td_ucred, mp);
4271 	if (error)
4272 		goto out;
4273 #endif
4274 	/*
4275 	 * Set these in case the underlying filesystem fails to do so.
4276 	 */
4277 	sp = &mp->mnt_stat;
4278 	sp->f_version = STATFS_VERSION;
4279 	sp->f_namemax = NAME_MAX;
4280 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4281 	error = VFS_STATFS(mp, sp, td);
4282 	if (error == 0)
4283 		*buf = *sp;
4284 out:
4285 	vfs_rel(mp);
4286 	VFS_UNLOCK_GIANT(vfslocked);
4287 	return (error);
4288 }
4289