xref: /freebsd/sys/kern/vfs_syscalls.c (revision 2b743a9e9ddc6736208dc8ca1ce06ce64ad20a19)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/limits.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/priv.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/jail.h>
67 #include <sys/syscallsubr.h>
68 #include <sys/sysctl.h>
69 
70 #include <machine/stdarg.h>
71 
72 #include <security/audit/audit.h>
73 #include <security/mac/mac_framework.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 /*
91  * The module initialization routine for POSIX asynchronous I/O will
92  * set this to the version of AIO that it implements.  (Zero means
93  * that it is not implemented.)  This value is used here by pathconf()
94  * and in kern_descrip.c by fpathconf().
95  */
96 int async_io_version;
97 
98 /*
99  * Sync each mounted filesystem.
100  */
101 #ifndef _SYS_SYSPROTO_H_
102 struct sync_args {
103 	int     dummy;
104 };
105 #endif
106 
107 #ifdef DEBUG
108 static int syncprt = 0;
109 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
110 #endif
111 
112 /* ARGSUSED */
113 int
114 sync(td, uap)
115 	struct thread *td;
116 	struct sync_args *uap;
117 {
118 	struct mount *mp, *nmp;
119 	int vfslocked;
120 
121 	mtx_lock(&mountlist_mtx);
122 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124 			nmp = TAILQ_NEXT(mp, mnt_list);
125 			continue;
126 		}
127 		vfslocked = VFS_LOCK_GIANT(mp);
128 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130 			MNT_ILOCK(mp);
131 			mp->mnt_noasync++;
132 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133 			MNT_IUNLOCK(mp);
134 			vfs_msync(mp, MNT_NOWAIT);
135 			VFS_SYNC(mp, MNT_NOWAIT, td);
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync--;
138 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139 			    mp->mnt_noasync == 0)
140 				mp->mnt_kern_flag |= MNTK_ASYNC;
141 			MNT_IUNLOCK(mp);
142 			vn_finished_write(mp);
143 		}
144 		VFS_UNLOCK_GIANT(vfslocked);
145 		mtx_lock(&mountlist_mtx);
146 		nmp = TAILQ_NEXT(mp, mnt_list);
147 		vfs_unbusy(mp, td);
148 	}
149 	mtx_unlock(&mountlist_mtx);
150 	return (0);
151 }
152 
153 /* XXX PRISON: could be per prison flag */
154 static int prison_quotas;
155 #if 0
156 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157 #endif
158 
159 /*
160  * Change filesystem quotas.
161  *
162  * MP SAFE
163  */
164 #ifndef _SYS_SYSPROTO_H_
165 struct quotactl_args {
166 	char *path;
167 	int cmd;
168 	int uid;
169 	caddr_t arg;
170 };
171 #endif
172 int
173 quotactl(td, uap)
174 	struct thread *td;
175 	register struct quotactl_args /* {
176 		char *path;
177 		int cmd;
178 		int uid;
179 		caddr_t arg;
180 	} */ *uap;
181 {
182 	struct mount *mp, *vmp;
183 	int vfslocked;
184 	int error;
185 	struct nameidata nd;
186 
187 	AUDIT_ARG(cmd, uap->cmd);
188 	AUDIT_ARG(uid, uap->uid);
189 	if (jailed(td->td_ucred) && !prison_quotas)
190 		return (EPERM);
191 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
192 	   UIO_USERSPACE, uap->path, td);
193 	if ((error = namei(&nd)) != 0)
194 		return (error);
195 	vfslocked = NDHASGIANT(&nd);
196 	NDFREE(&nd, NDF_ONLY_PNBUF);
197 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
198 	mp = nd.ni_vp->v_mount;
199 	vrele(nd.ni_vp);
200 	if (error)
201 		goto out;
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vn_finished_write(vmp);
204 out:
205 	VFS_UNLOCK_GIANT(vfslocked);
206 	return (error);
207 }
208 
209 /*
210  * Get filesystem statistics.
211  */
212 #ifndef _SYS_SYSPROTO_H_
213 struct statfs_args {
214 	char *path;
215 	struct statfs *buf;
216 };
217 #endif
218 int
219 statfs(td, uap)
220 	struct thread *td;
221 	register struct statfs_args /* {
222 		char *path;
223 		struct statfs *buf;
224 	} */ *uap;
225 {
226 	struct statfs sf;
227 	int error;
228 
229 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
230 	if (error == 0)
231 		error = copyout(&sf, uap->buf, sizeof(sf));
232 	return (error);
233 }
234 
235 int
236 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
237     struct statfs *buf)
238 {
239 	struct mount *mp;
240 	struct statfs *sp, sb;
241 	int vfslocked;
242 	int error;
243 	struct nameidata nd;
244 
245 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
246 	    pathseg, path, td);
247 	error = namei(&nd);
248 	if (error)
249 		return (error);
250 	vfslocked = NDHASGIANT(&nd);
251 	mp = nd.ni_vp->v_mount;
252 	vfs_ref(mp);
253 	NDFREE(&nd, NDF_ONLY_PNBUF);
254 	vput(nd.ni_vp);
255 #ifdef MAC
256 	error = mac_check_mount_stat(td->td_ucred, mp);
257 	if (error)
258 		goto out;
259 #endif
260 	/*
261 	 * Set these in case the underlying filesystem fails to do so.
262 	 */
263 	sp = &mp->mnt_stat;
264 	sp->f_version = STATFS_VERSION;
265 	sp->f_namemax = NAME_MAX;
266 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
267 	error = VFS_STATFS(mp, sp, td);
268 	if (error)
269 		goto out;
270 	if (priv_check(td, PRIV_VFS_GENERATION)) {
271 		bcopy(sp, &sb, sizeof(sb));
272 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
273 		prison_enforce_statfs(td->td_ucred, mp, &sb);
274 		sp = &sb;
275 	}
276 	*buf = *sp;
277 out:
278 	vfs_rel(mp);
279 	VFS_UNLOCK_GIANT(vfslocked);
280 	if (mtx_owned(&Giant))
281 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
282 	return (error);
283 }
284 
285 /*
286  * Get filesystem statistics.
287  */
288 #ifndef _SYS_SYSPROTO_H_
289 struct fstatfs_args {
290 	int fd;
291 	struct statfs *buf;
292 };
293 #endif
294 int
295 fstatfs(td, uap)
296 	struct thread *td;
297 	register struct fstatfs_args /* {
298 		int fd;
299 		struct statfs *buf;
300 	} */ *uap;
301 {
302 	struct statfs sf;
303 	int error;
304 
305 	error = kern_fstatfs(td, uap->fd, &sf);
306 	if (error == 0)
307 		error = copyout(&sf, uap->buf, sizeof(sf));
308 	return (error);
309 }
310 
311 int
312 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
313 {
314 	struct file *fp;
315 	struct mount *mp;
316 	struct statfs *sp, sb;
317 	int vfslocked;
318 	struct vnode *vp;
319 	int error;
320 
321 	AUDIT_ARG(fd, fd);
322 	error = getvnode(td->td_proc->p_fd, fd, &fp);
323 	if (error)
324 		return (error);
325 	vp = fp->f_vnode;
326 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
327 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
328 #ifdef AUDIT
329 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
330 #endif
331 	mp = vp->v_mount;
332 	if (mp)
333 		vfs_ref(mp);
334 	VOP_UNLOCK(vp, 0, td);
335 	fdrop(fp, td);
336 	if (vp->v_iflag & VI_DOOMED) {
337 		error = EBADF;
338 		goto out;
339 	}
340 #ifdef MAC
341 	error = mac_check_mount_stat(td->td_ucred, mp);
342 	if (error)
343 		goto out;
344 #endif
345 	/*
346 	 * Set these in case the underlying filesystem fails to do so.
347 	 */
348 	sp = &mp->mnt_stat;
349 	sp->f_version = STATFS_VERSION;
350 	sp->f_namemax = NAME_MAX;
351 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
352 	error = VFS_STATFS(mp, sp, td);
353 	if (error)
354 		goto out;
355 	if (priv_check(td, PRIV_VFS_GENERATION)) {
356 		bcopy(sp, &sb, sizeof(sb));
357 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
358 		prison_enforce_statfs(td->td_ucred, mp, &sb);
359 		sp = &sb;
360 	}
361 	*buf = *sp;
362 out:
363 	if (mp)
364 		vfs_rel(mp);
365 	VFS_UNLOCK_GIANT(vfslocked);
366 	return (error);
367 }
368 
369 /*
370  * Get statistics on all filesystems.
371  */
372 #ifndef _SYS_SYSPROTO_H_
373 struct getfsstat_args {
374 	struct statfs *buf;
375 	long bufsize;
376 	int flags;
377 };
378 #endif
379 int
380 getfsstat(td, uap)
381 	struct thread *td;
382 	register struct getfsstat_args /* {
383 		struct statfs *buf;
384 		long bufsize;
385 		int flags;
386 	} */ *uap;
387 {
388 
389 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
390 	    uap->flags));
391 }
392 
393 /*
394  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
395  * 	The caller is responsible for freeing memory which will be allocated
396  *	in '*buf'.
397  */
398 int
399 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
400     enum uio_seg bufseg, int flags)
401 {
402 	struct mount *mp, *nmp;
403 	struct statfs *sfsp, *sp, sb;
404 	size_t count, maxcount;
405 	int vfslocked;
406 	int error;
407 
408 	maxcount = bufsize / sizeof(struct statfs);
409 	if (bufsize == 0)
410 		sfsp = NULL;
411 	else if (bufseg == UIO_USERSPACE)
412 		sfsp = *buf;
413 	else /* if (bufseg == UIO_SYSSPACE) */ {
414 		count = 0;
415 		mtx_lock(&mountlist_mtx);
416 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
417 			count++;
418 		}
419 		mtx_unlock(&mountlist_mtx);
420 		if (maxcount > count)
421 			maxcount = count;
422 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
423 		    M_WAITOK);
424 	}
425 	count = 0;
426 	mtx_lock(&mountlist_mtx);
427 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
428 		if (prison_canseemount(td->td_ucred, mp) != 0) {
429 			nmp = TAILQ_NEXT(mp, mnt_list);
430 			continue;
431 		}
432 #ifdef MAC
433 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
434 			nmp = TAILQ_NEXT(mp, mnt_list);
435 			continue;
436 		}
437 #endif
438 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
439 			nmp = TAILQ_NEXT(mp, mnt_list);
440 			continue;
441 		}
442 		vfslocked = VFS_LOCK_GIANT(mp);
443 		if (sfsp && count < maxcount) {
444 			sp = &mp->mnt_stat;
445 			/*
446 			 * Set these in case the underlying filesystem
447 			 * fails to do so.
448 			 */
449 			sp->f_version = STATFS_VERSION;
450 			sp->f_namemax = NAME_MAX;
451 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
452 			/*
453 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
454 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
455 			 * overrides MNT_WAIT.
456 			 */
457 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
458 			    (flags & MNT_WAIT)) &&
459 			    (error = VFS_STATFS(mp, sp, td))) {
460 				VFS_UNLOCK_GIANT(vfslocked);
461 				mtx_lock(&mountlist_mtx);
462 				nmp = TAILQ_NEXT(mp, mnt_list);
463 				vfs_unbusy(mp, td);
464 				continue;
465 			}
466 			if (priv_check(td, PRIV_VFS_GENERATION)) {
467 				bcopy(sp, &sb, sizeof(sb));
468 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
469 				prison_enforce_statfs(td->td_ucred, mp, &sb);
470 				sp = &sb;
471 			}
472 			if (bufseg == UIO_SYSSPACE)
473 				bcopy(sp, sfsp, sizeof(*sp));
474 			else /* if (bufseg == UIO_USERSPACE) */ {
475 				error = copyout(sp, sfsp, sizeof(*sp));
476 				if (error) {
477 					vfs_unbusy(mp, td);
478 					VFS_UNLOCK_GIANT(vfslocked);
479 					return (error);
480 				}
481 			}
482 			sfsp++;
483 		}
484 		VFS_UNLOCK_GIANT(vfslocked);
485 		count++;
486 		mtx_lock(&mountlist_mtx);
487 		nmp = TAILQ_NEXT(mp, mnt_list);
488 		vfs_unbusy(mp, td);
489 	}
490 	mtx_unlock(&mountlist_mtx);
491 	if (sfsp && count > maxcount)
492 		td->td_retval[0] = maxcount;
493 	else
494 		td->td_retval[0] = count;
495 	return (0);
496 }
497 
498 #ifdef COMPAT_FREEBSD4
499 /*
500  * Get old format filesystem statistics.
501  */
502 static void cvtstatfs(struct statfs *, struct ostatfs *);
503 
504 #ifndef _SYS_SYSPROTO_H_
505 struct freebsd4_statfs_args {
506 	char *path;
507 	struct ostatfs *buf;
508 };
509 #endif
510 int
511 freebsd4_statfs(td, uap)
512 	struct thread *td;
513 	struct freebsd4_statfs_args /* {
514 		char *path;
515 		struct ostatfs *buf;
516 	} */ *uap;
517 {
518 	struct ostatfs osb;
519 	struct statfs sf;
520 	int error;
521 
522 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
523 	if (error)
524 		return (error);
525 	cvtstatfs(&sf, &osb);
526 	return (copyout(&osb, uap->buf, sizeof(osb)));
527 }
528 
529 /*
530  * Get filesystem statistics.
531  */
532 #ifndef _SYS_SYSPROTO_H_
533 struct freebsd4_fstatfs_args {
534 	int fd;
535 	struct ostatfs *buf;
536 };
537 #endif
538 int
539 freebsd4_fstatfs(td, uap)
540 	struct thread *td;
541 	struct freebsd4_fstatfs_args /* {
542 		int fd;
543 		struct ostatfs *buf;
544 	} */ *uap;
545 {
546 	struct ostatfs osb;
547 	struct statfs sf;
548 	int error;
549 
550 	error = kern_fstatfs(td, uap->fd, &sf);
551 	if (error)
552 		return (error);
553 	cvtstatfs(&sf, &osb);
554 	return (copyout(&osb, uap->buf, sizeof(osb)));
555 }
556 
557 /*
558  * Get statistics on all filesystems.
559  */
560 #ifndef _SYS_SYSPROTO_H_
561 struct freebsd4_getfsstat_args {
562 	struct ostatfs *buf;
563 	long bufsize;
564 	int flags;
565 };
566 #endif
567 int
568 freebsd4_getfsstat(td, uap)
569 	struct thread *td;
570 	register struct freebsd4_getfsstat_args /* {
571 		struct ostatfs *buf;
572 		long bufsize;
573 		int flags;
574 	} */ *uap;
575 {
576 	struct statfs *buf, *sp;
577 	struct ostatfs osb;
578 	size_t count, size;
579 	int error;
580 
581 	count = uap->bufsize / sizeof(struct ostatfs);
582 	size = count * sizeof(struct statfs);
583 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
584 	if (size > 0) {
585 		count = td->td_retval[0];
586 		sp = buf;
587 		while (count > 0 && error == 0) {
588 			cvtstatfs(sp, &osb);
589 			error = copyout(&osb, uap->buf, sizeof(osb));
590 			sp++;
591 			uap->buf++;
592 			count--;
593 		}
594 		free(buf, M_TEMP);
595 	}
596 	return (error);
597 }
598 
599 /*
600  * Implement fstatfs() for (NFS) file handles.
601  */
602 #ifndef _SYS_SYSPROTO_H_
603 struct freebsd4_fhstatfs_args {
604 	struct fhandle *u_fhp;
605 	struct ostatfs *buf;
606 };
607 #endif
608 int
609 freebsd4_fhstatfs(td, uap)
610 	struct thread *td;
611 	struct freebsd4_fhstatfs_args /* {
612 		struct fhandle *u_fhp;
613 		struct ostatfs *buf;
614 	} */ *uap;
615 {
616 	struct ostatfs osb;
617 	struct statfs sf;
618 	fhandle_t fh;
619 	int error;
620 
621 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
622 	if (error)
623 		return (error);
624 	error = kern_fhstatfs(td, fh, &sf);
625 	if (error)
626 		return (error);
627 	cvtstatfs(&sf, &osb);
628 	return (copyout(&osb, uap->buf, sizeof(osb)));
629 }
630 
631 /*
632  * Convert a new format statfs structure to an old format statfs structure.
633  */
634 static void
635 cvtstatfs(nsp, osp)
636 	struct statfs *nsp;
637 	struct ostatfs *osp;
638 {
639 
640 	bzero(osp, sizeof(*osp));
641 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
642 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
643 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
644 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
645 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
646 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
647 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
648 	osp->f_owner = nsp->f_owner;
649 	osp->f_type = nsp->f_type;
650 	osp->f_flags = nsp->f_flags;
651 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
652 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
653 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
654 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
655 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
656 	    MIN(MFSNAMELEN, OMFSNAMELEN));
657 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
658 	    MIN(MNAMELEN, OMNAMELEN));
659 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
660 	    MIN(MNAMELEN, OMNAMELEN));
661 	osp->f_fsid = nsp->f_fsid;
662 }
663 #endif /* COMPAT_FREEBSD4 */
664 
665 /*
666  * Change current working directory to a given file descriptor.
667  */
668 #ifndef _SYS_SYSPROTO_H_
669 struct fchdir_args {
670 	int	fd;
671 };
672 #endif
673 int
674 fchdir(td, uap)
675 	struct thread *td;
676 	struct fchdir_args /* {
677 		int fd;
678 	} */ *uap;
679 {
680 	register struct filedesc *fdp = td->td_proc->p_fd;
681 	struct vnode *vp, *tdp, *vpold;
682 	struct mount *mp;
683 	struct file *fp;
684 	int vfslocked;
685 	int error;
686 
687 	AUDIT_ARG(fd, uap->fd);
688 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
689 		return (error);
690 	vp = fp->f_vnode;
691 	VREF(vp);
692 	fdrop(fp, td);
693 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
694 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
695 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
696 	error = change_dir(vp, td);
697 	while (!error && (mp = vp->v_mountedhere) != NULL) {
698 		int tvfslocked;
699 		if (vfs_busy(mp, 0, 0, td))
700 			continue;
701 		tvfslocked = VFS_LOCK_GIANT(mp);
702 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
703 		vfs_unbusy(mp, td);
704 		if (error) {
705 			VFS_UNLOCK_GIANT(tvfslocked);
706 			break;
707 		}
708 		vput(vp);
709 		VFS_UNLOCK_GIANT(vfslocked);
710 		vp = tdp;
711 		vfslocked = tvfslocked;
712 	}
713 	if (error) {
714 		vput(vp);
715 		VFS_UNLOCK_GIANT(vfslocked);
716 		return (error);
717 	}
718 	VOP_UNLOCK(vp, 0, td);
719 	VFS_UNLOCK_GIANT(vfslocked);
720 	FILEDESC_LOCK_FAST(fdp);
721 	vpold = fdp->fd_cdir;
722 	fdp->fd_cdir = vp;
723 	FILEDESC_UNLOCK_FAST(fdp);
724 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
725 	vrele(vpold);
726 	VFS_UNLOCK_GIANT(vfslocked);
727 	return (0);
728 }
729 
730 /*
731  * Change current working directory (``.'').
732  */
733 #ifndef _SYS_SYSPROTO_H_
734 struct chdir_args {
735 	char	*path;
736 };
737 #endif
738 int
739 chdir(td, uap)
740 	struct thread *td;
741 	struct chdir_args /* {
742 		char *path;
743 	} */ *uap;
744 {
745 
746 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
747 }
748 
749 int
750 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
751 {
752 	register struct filedesc *fdp = td->td_proc->p_fd;
753 	int error;
754 	struct nameidata nd;
755 	struct vnode *vp;
756 	int vfslocked;
757 
758 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
759 	    pathseg, path, td);
760 	if ((error = namei(&nd)) != 0)
761 		return (error);
762 	vfslocked = NDHASGIANT(&nd);
763 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
764 		vput(nd.ni_vp);
765 		VFS_UNLOCK_GIANT(vfslocked);
766 		NDFREE(&nd, NDF_ONLY_PNBUF);
767 		return (error);
768 	}
769 	VOP_UNLOCK(nd.ni_vp, 0, td);
770 	VFS_UNLOCK_GIANT(vfslocked);
771 	NDFREE(&nd, NDF_ONLY_PNBUF);
772 	FILEDESC_LOCK_FAST(fdp);
773 	vp = fdp->fd_cdir;
774 	fdp->fd_cdir = nd.ni_vp;
775 	FILEDESC_UNLOCK_FAST(fdp);
776 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
777 	vrele(vp);
778 	VFS_UNLOCK_GIANT(vfslocked);
779 	return (0);
780 }
781 
782 /*
783  * Helper function for raised chroot(2) security function:  Refuse if
784  * any filedescriptors are open directories.
785  */
786 static int
787 chroot_refuse_vdir_fds(fdp)
788 	struct filedesc *fdp;
789 {
790 	struct vnode *vp;
791 	struct file *fp;
792 	int fd;
793 
794 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
795 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796 		fp = fget_locked(fdp, fd);
797 		if (fp == NULL)
798 			continue;
799 		if (fp->f_type == DTYPE_VNODE) {
800 			vp = fp->f_vnode;
801 			if (vp->v_type == VDIR)
802 				return (EPERM);
803 		}
804 	}
805 	return (0);
806 }
807 
808 /*
809  * This sysctl determines if we will allow a process to chroot(2) if it
810  * has a directory open:
811  *	0: disallowed for all processes.
812  *	1: allowed for processes that were not already chroot(2)'ed.
813  *	2: allowed for all processes.
814  */
815 
816 static int chroot_allow_open_directories = 1;
817 
818 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819      &chroot_allow_open_directories, 0, "");
820 
821 /*
822  * Change notion of root (``/'') directory.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct chroot_args {
826 	char	*path;
827 };
828 #endif
829 int
830 chroot(td, uap)
831 	struct thread *td;
832 	struct chroot_args /* {
833 		char *path;
834 	} */ *uap;
835 {
836 	int error;
837 	struct nameidata nd;
838 	int vfslocked;
839 
840 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
841 	    SUSER_ALLOWJAIL);
842 	if (error)
843 		return (error);
844 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
845 	    UIO_USERSPACE, uap->path, td);
846 	error = namei(&nd);
847 	if (error)
848 		goto error;
849 	vfslocked = NDHASGIANT(&nd);
850 	if ((error = change_dir(nd.ni_vp, td)) != 0)
851 		goto e_vunlock;
852 #ifdef MAC
853 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
854 		goto e_vunlock;
855 #endif
856 	VOP_UNLOCK(nd.ni_vp, 0, td);
857 	error = change_root(nd.ni_vp, td);
858 	vrele(nd.ni_vp);
859 	VFS_UNLOCK_GIANT(vfslocked);
860 	NDFREE(&nd, NDF_ONLY_PNBUF);
861 	return (error);
862 e_vunlock:
863 	vput(nd.ni_vp);
864 	VFS_UNLOCK_GIANT(vfslocked);
865 error:
866 	NDFREE(&nd, NDF_ONLY_PNBUF);
867 	return (error);
868 }
869 
870 /*
871  * Common routine for chroot and chdir.  Callers must provide a locked vnode
872  * instance.
873  */
874 int
875 change_dir(vp, td)
876 	struct vnode *vp;
877 	struct thread *td;
878 {
879 	int error;
880 
881 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
882 	if (vp->v_type != VDIR)
883 		return (ENOTDIR);
884 #ifdef MAC
885 	error = mac_check_vnode_chdir(td->td_ucred, vp);
886 	if (error)
887 		return (error);
888 #endif
889 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
890 	return (error);
891 }
892 
893 /*
894  * Common routine for kern_chroot() and jail_attach().  The caller is
895  * responsible for invoking priv_check() and mac_check_chroot() to authorize
896  * this operation.
897  */
898 int
899 change_root(vp, td)
900 	struct vnode *vp;
901 	struct thread *td;
902 {
903 	struct filedesc *fdp;
904 	struct vnode *oldvp;
905 	int vfslocked;
906 	int error;
907 
908 	VFS_ASSERT_GIANT(vp->v_mount);
909 	fdp = td->td_proc->p_fd;
910 	FILEDESC_LOCK(fdp);
911 	if (chroot_allow_open_directories == 0 ||
912 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
913 		error = chroot_refuse_vdir_fds(fdp);
914 		if (error) {
915 			FILEDESC_UNLOCK(fdp);
916 			return (error);
917 		}
918 	}
919 	oldvp = fdp->fd_rdir;
920 	fdp->fd_rdir = vp;
921 	VREF(fdp->fd_rdir);
922 	if (!fdp->fd_jdir) {
923 		fdp->fd_jdir = vp;
924 		VREF(fdp->fd_jdir);
925 	}
926 	FILEDESC_UNLOCK(fdp);
927 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
928 	vrele(oldvp);
929 	VFS_UNLOCK_GIANT(vfslocked);
930 	return (0);
931 }
932 
933 /*
934  * Check permissions, allocate an open file structure,
935  * and call the device open routine if any.
936  *
937  * MP SAFE
938  */
939 #ifndef _SYS_SYSPROTO_H_
940 struct open_args {
941 	char	*path;
942 	int	flags;
943 	int	mode;
944 };
945 #endif
946 int
947 open(td, uap)
948 	struct thread *td;
949 	register struct open_args /* {
950 		char *path;
951 		int flags;
952 		int mode;
953 	} */ *uap;
954 {
955 
956 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
957 }
958 
959 int
960 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
961     int mode)
962 {
963 	struct proc *p = td->td_proc;
964 	struct filedesc *fdp = p->p_fd;
965 	struct file *fp;
966 	struct vnode *vp;
967 	struct vattr vat;
968 	struct mount *mp;
969 	int cmode;
970 	struct file *nfp;
971 	int type, indx, error;
972 	struct flock lf;
973 	struct nameidata nd;
974 	int vfslocked;
975 
976 	AUDIT_ARG(fflags, flags);
977 	AUDIT_ARG(mode, mode);
978 	if ((flags & O_ACCMODE) == O_ACCMODE)
979 		return (EINVAL);
980 	flags = FFLAGS(flags);
981 	error = falloc(td, &nfp, &indx);
982 	if (error)
983 		return (error);
984 	/* An extra reference on `nfp' has been held for us by falloc(). */
985 	fp = nfp;
986 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
987 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
988 	td->td_dupfd = -1;		/* XXX check for fdopen */
989 	error = vn_open(&nd, &flags, cmode, indx);
990 	if (error) {
991 		/*
992 		 * If the vn_open replaced the method vector, something
993 		 * wonderous happened deep below and we just pass it up
994 		 * pretending we know what we do.
995 		 */
996 		if (error == ENXIO && fp->f_ops != &badfileops) {
997 			fdrop(fp, td);
998 			td->td_retval[0] = indx;
999 			return (0);
1000 		}
1001 
1002 		/*
1003 		 * release our own reference
1004 		 */
1005 		fdrop(fp, td);
1006 
1007 		/*
1008 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1009 		 * responsible for dropping the old contents of ofiles[indx]
1010 		 * if it succeeds.
1011 		 */
1012 		if ((error == ENODEV || error == ENXIO) &&
1013 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1014 		    (error =
1015 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1016 			td->td_retval[0] = indx;
1017 			return (0);
1018 		}
1019 		/*
1020 		 * Clean up the descriptor, but only if another thread hadn't
1021 		 * replaced or closed it.
1022 		 */
1023 		fdclose(fdp, fp, indx, td);
1024 
1025 		if (error == ERESTART)
1026 			error = EINTR;
1027 		return (error);
1028 	}
1029 	td->td_dupfd = 0;
1030 	vfslocked = NDHASGIANT(&nd);
1031 	NDFREE(&nd, NDF_ONLY_PNBUF);
1032 	vp = nd.ni_vp;
1033 
1034 	/*
1035 	 * There should be 2 references on the file, one from the descriptor
1036 	 * table, and one for us.
1037 	 *
1038 	 * Handle the case where someone closed the file (via its file
1039 	 * descriptor) while we were blocked.  The end result should look
1040 	 * like opening the file succeeded but it was immediately closed.
1041 	 * We call vn_close() manually because we haven't yet hooked up
1042 	 * the various 'struct file' fields.
1043 	 */
1044 	FILEDESC_LOCK(fdp);
1045 	FILE_LOCK(fp);
1046 	if (fp->f_count == 1) {
1047 		mp = vp->v_mount;
1048 		KASSERT(fdp->fd_ofiles[indx] != fp,
1049 		    ("Open file descriptor lost all refs"));
1050 		FILE_UNLOCK(fp);
1051 		FILEDESC_UNLOCK(fdp);
1052 		VOP_UNLOCK(vp, 0, td);
1053 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1054 		VFS_UNLOCK_GIANT(vfslocked);
1055 		fdrop(fp, td);
1056 		td->td_retval[0] = indx;
1057 		return (0);
1058 	}
1059 	fp->f_vnode = vp;
1060 	if (fp->f_data == NULL)
1061 		fp->f_data = vp;
1062 	fp->f_flag = flags & FMASK;
1063 	if (fp->f_ops == &badfileops)
1064 		fp->f_ops = &vnops;
1065 	fp->f_seqcount = 1;
1066 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1067 	FILE_UNLOCK(fp);
1068 	FILEDESC_UNLOCK(fdp);
1069 
1070 	VOP_UNLOCK(vp, 0, td);
1071 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1072 		lf.l_whence = SEEK_SET;
1073 		lf.l_start = 0;
1074 		lf.l_len = 0;
1075 		if (flags & O_EXLOCK)
1076 			lf.l_type = F_WRLCK;
1077 		else
1078 			lf.l_type = F_RDLCK;
1079 		type = F_FLOCK;
1080 		if ((flags & FNONBLOCK) == 0)
1081 			type |= F_WAIT;
1082 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1083 			    type)) != 0)
1084 			goto bad;
1085 		fp->f_flag |= FHASLOCK;
1086 	}
1087 	if (flags & O_TRUNC) {
1088 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1089 			goto bad;
1090 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1091 		VATTR_NULL(&vat);
1092 		vat.va_size = 0;
1093 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1094 #ifdef MAC
1095 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1096 		if (error == 0)
1097 #endif
1098 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1099 		VOP_UNLOCK(vp, 0, td);
1100 		vn_finished_write(mp);
1101 		if (error)
1102 			goto bad;
1103 	}
1104 	VFS_UNLOCK_GIANT(vfslocked);
1105 	/*
1106 	 * Release our private reference, leaving the one associated with
1107 	 * the descriptor table intact.
1108 	 */
1109 	fdrop(fp, td);
1110 	td->td_retval[0] = indx;
1111 	return (0);
1112 bad:
1113 	VFS_UNLOCK_GIANT(vfslocked);
1114 	fdclose(fdp, fp, indx, td);
1115 	fdrop(fp, td);
1116 	return (error);
1117 }
1118 
1119 #ifdef COMPAT_43
1120 /*
1121  * Create a file.
1122  *
1123  * MP SAFE
1124  */
1125 #ifndef _SYS_SYSPROTO_H_
1126 struct ocreat_args {
1127 	char	*path;
1128 	int	mode;
1129 };
1130 #endif
1131 int
1132 ocreat(td, uap)
1133 	struct thread *td;
1134 	register struct ocreat_args /* {
1135 		char *path;
1136 		int mode;
1137 	} */ *uap;
1138 {
1139 
1140 	return (kern_open(td, uap->path, UIO_USERSPACE,
1141 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1142 }
1143 #endif /* COMPAT_43 */
1144 
1145 /*
1146  * Create a special file.
1147  */
1148 #ifndef _SYS_SYSPROTO_H_
1149 struct mknod_args {
1150 	char	*path;
1151 	int	mode;
1152 	int	dev;
1153 };
1154 #endif
1155 int
1156 mknod(td, uap)
1157 	struct thread *td;
1158 	register struct mknod_args /* {
1159 		char *path;
1160 		int mode;
1161 		int dev;
1162 	} */ *uap;
1163 {
1164 
1165 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1166 }
1167 
1168 int
1169 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1170     int dev)
1171 {
1172 	struct vnode *vp;
1173 	struct mount *mp;
1174 	struct vattr vattr;
1175 	int error;
1176 	int whiteout = 0;
1177 	struct nameidata nd;
1178 	int vfslocked;
1179 
1180 	AUDIT_ARG(mode, mode);
1181 	AUDIT_ARG(dev, dev);
1182 	switch (mode & S_IFMT) {
1183 	case S_IFCHR:
1184 	case S_IFBLK:
1185 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1186 		break;
1187 	case S_IFMT:
1188 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1189 		break;
1190 	case S_IFWHT:
1191 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1192 		break;
1193 	default:
1194 		error = EINVAL;
1195 		break;
1196 	}
1197 	if (error)
1198 		return (error);
1199 restart:
1200 	bwillwrite();
1201 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1202 	    pathseg, path, td);
1203 	if ((error = namei(&nd)) != 0)
1204 		return (error);
1205 	vfslocked = NDHASGIANT(&nd);
1206 	vp = nd.ni_vp;
1207 	if (vp != NULL) {
1208 		NDFREE(&nd, NDF_ONLY_PNBUF);
1209 		if (vp == nd.ni_dvp)
1210 			vrele(nd.ni_dvp);
1211 		else
1212 			vput(nd.ni_dvp);
1213 		vrele(vp);
1214 		VFS_UNLOCK_GIANT(vfslocked);
1215 		return (EEXIST);
1216 	} else {
1217 		VATTR_NULL(&vattr);
1218 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1219 		vattr.va_mode = (mode & ALLPERMS) &
1220 		    ~td->td_proc->p_fd->fd_cmask;
1221 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1222 		vattr.va_rdev = dev;
1223 		whiteout = 0;
1224 
1225 		switch (mode & S_IFMT) {
1226 		case S_IFMT:	/* used by badsect to flag bad sectors */
1227 			vattr.va_type = VBAD;
1228 			break;
1229 		case S_IFCHR:
1230 			vattr.va_type = VCHR;
1231 			break;
1232 		case S_IFBLK:
1233 			vattr.va_type = VBLK;
1234 			break;
1235 		case S_IFWHT:
1236 			whiteout = 1;
1237 			break;
1238 		default:
1239 			panic("kern_mknod: invalid mode");
1240 		}
1241 	}
1242 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1243 		NDFREE(&nd, NDF_ONLY_PNBUF);
1244 		vput(nd.ni_dvp);
1245 		VFS_UNLOCK_GIANT(vfslocked);
1246 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1247 			return (error);
1248 		goto restart;
1249 	}
1250 #ifdef MAC
1251 	if (error == 0 && !whiteout)
1252 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1253 		    &nd.ni_cnd, &vattr);
1254 #endif
1255 	if (!error) {
1256 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1257 		if (whiteout)
1258 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1259 		else {
1260 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1261 						&nd.ni_cnd, &vattr);
1262 			if (error == 0)
1263 				vput(nd.ni_vp);
1264 		}
1265 	}
1266 	NDFREE(&nd, NDF_ONLY_PNBUF);
1267 	vput(nd.ni_dvp);
1268 	vn_finished_write(mp);
1269 	VFS_UNLOCK_GIANT(vfslocked);
1270 	return (error);
1271 }
1272 
1273 /*
1274  * Create a named pipe.
1275  */
1276 #ifndef _SYS_SYSPROTO_H_
1277 struct mkfifo_args {
1278 	char	*path;
1279 	int	mode;
1280 };
1281 #endif
1282 int
1283 mkfifo(td, uap)
1284 	struct thread *td;
1285 	register struct mkfifo_args /* {
1286 		char *path;
1287 		int mode;
1288 	} */ *uap;
1289 {
1290 
1291 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1292 }
1293 
1294 int
1295 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1296 {
1297 	struct mount *mp;
1298 	struct vattr vattr;
1299 	int error;
1300 	struct nameidata nd;
1301 	int vfslocked;
1302 
1303 	AUDIT_ARG(mode, mode);
1304 restart:
1305 	bwillwrite();
1306 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1307 	    pathseg, path, td);
1308 	if ((error = namei(&nd)) != 0)
1309 		return (error);
1310 	vfslocked = NDHASGIANT(&nd);
1311 	if (nd.ni_vp != NULL) {
1312 		NDFREE(&nd, NDF_ONLY_PNBUF);
1313 		if (nd.ni_vp == nd.ni_dvp)
1314 			vrele(nd.ni_dvp);
1315 		else
1316 			vput(nd.ni_dvp);
1317 		vrele(nd.ni_vp);
1318 		VFS_UNLOCK_GIANT(vfslocked);
1319 		return (EEXIST);
1320 	}
1321 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1322 		NDFREE(&nd, NDF_ONLY_PNBUF);
1323 		vput(nd.ni_dvp);
1324 		VFS_UNLOCK_GIANT(vfslocked);
1325 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1326 			return (error);
1327 		goto restart;
1328 	}
1329 	VATTR_NULL(&vattr);
1330 	vattr.va_type = VFIFO;
1331 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1332 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1333 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1334 #ifdef MAC
1335 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1336 	    &vattr);
1337 	if (error)
1338 		goto out;
1339 #endif
1340 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1341 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1342 	if (error == 0)
1343 		vput(nd.ni_vp);
1344 #ifdef MAC
1345 out:
1346 #endif
1347 	vput(nd.ni_dvp);
1348 	vn_finished_write(mp);
1349 	VFS_UNLOCK_GIANT(vfslocked);
1350 	NDFREE(&nd, NDF_ONLY_PNBUF);
1351 	return (error);
1352 }
1353 
1354 /*
1355  * Make a hard file link.
1356  */
1357 #ifndef _SYS_SYSPROTO_H_
1358 struct link_args {
1359 	char	*path;
1360 	char	*link;
1361 };
1362 #endif
1363 int
1364 link(td, uap)
1365 	struct thread *td;
1366 	register struct link_args /* {
1367 		char *path;
1368 		char *link;
1369 	} */ *uap;
1370 {
1371 	int error;
1372 
1373 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1374 	return (error);
1375 }
1376 
1377 static int hardlink_check_uid = 0;
1378 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1379     &hardlink_check_uid, 0,
1380     "Unprivileged processes cannot create hard links to files owned by other "
1381     "users");
1382 static int hardlink_check_gid = 0;
1383 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1384     &hardlink_check_gid, 0,
1385     "Unprivileged processes cannot create hard links to files owned by other "
1386     "groups");
1387 
1388 static int
1389 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1390 {
1391 	struct vattr va;
1392 	int error;
1393 
1394 	if (!hardlink_check_uid && !hardlink_check_gid)
1395 		return (0);
1396 
1397 	error = VOP_GETATTR(vp, &va, cred, td);
1398 	if (error != 0)
1399 		return (error);
1400 
1401 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1402 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1403 		    SUSER_ALLOWJAIL);
1404 		if (error)
1405 			return (error);
1406 	}
1407 
1408 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1409 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1410 		    SUSER_ALLOWJAIL);
1411 		if (error)
1412 			return (error);
1413 	}
1414 
1415 	return (0);
1416 }
1417 
1418 int
1419 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1420 {
1421 	struct vnode *vp;
1422 	struct mount *mp;
1423 	struct nameidata nd;
1424 	int vfslocked;
1425 	int lvfslocked;
1426 	int error;
1427 
1428 	bwillwrite();
1429 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1430 	if ((error = namei(&nd)) != 0)
1431 		return (error);
1432 	vfslocked = NDHASGIANT(&nd);
1433 	NDFREE(&nd, NDF_ONLY_PNBUF);
1434 	vp = nd.ni_vp;
1435 	if (vp->v_type == VDIR) {
1436 		vrele(vp);
1437 		VFS_UNLOCK_GIANT(vfslocked);
1438 		return (EPERM);		/* POSIX */
1439 	}
1440 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1441 		vrele(vp);
1442 		VFS_UNLOCK_GIANT(vfslocked);
1443 		return (error);
1444 	}
1445 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1446 	    segflg, link, td);
1447 	if ((error = namei(&nd)) == 0) {
1448 		lvfslocked = NDHASGIANT(&nd);
1449 		if (nd.ni_vp != NULL) {
1450 			if (nd.ni_dvp == nd.ni_vp)
1451 				vrele(nd.ni_dvp);
1452 			else
1453 				vput(nd.ni_dvp);
1454 			vrele(nd.ni_vp);
1455 			error = EEXIST;
1456 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1457 		    == 0) {
1458 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1459 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1460 			error = can_hardlink(vp, td, td->td_ucred);
1461 			if (error == 0)
1462 #ifdef MAC
1463 				error = mac_check_vnode_link(td->td_ucred,
1464 				    nd.ni_dvp, vp, &nd.ni_cnd);
1465 			if (error == 0)
1466 #endif
1467 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1468 			VOP_UNLOCK(vp, 0, td);
1469 			vput(nd.ni_dvp);
1470 		}
1471 		NDFREE(&nd, NDF_ONLY_PNBUF);
1472 		VFS_UNLOCK_GIANT(lvfslocked);
1473 	}
1474 	vrele(vp);
1475 	vn_finished_write(mp);
1476 	VFS_UNLOCK_GIANT(vfslocked);
1477 	return (error);
1478 }
1479 
1480 /*
1481  * Make a symbolic link.
1482  */
1483 #ifndef _SYS_SYSPROTO_H_
1484 struct symlink_args {
1485 	char	*path;
1486 	char	*link;
1487 };
1488 #endif
1489 int
1490 symlink(td, uap)
1491 	struct thread *td;
1492 	register struct symlink_args /* {
1493 		char *path;
1494 		char *link;
1495 	} */ *uap;
1496 {
1497 
1498 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1499 }
1500 
1501 int
1502 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1503 {
1504 	struct mount *mp;
1505 	struct vattr vattr;
1506 	char *syspath;
1507 	int error;
1508 	struct nameidata nd;
1509 	int vfslocked;
1510 
1511 	if (segflg == UIO_SYSSPACE) {
1512 		syspath = path;
1513 	} else {
1514 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1515 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1516 			goto out;
1517 	}
1518 	AUDIT_ARG(text, syspath);
1519 restart:
1520 	bwillwrite();
1521 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1522 	    segflg, link, td);
1523 	if ((error = namei(&nd)) != 0)
1524 		goto out;
1525 	vfslocked = NDHASGIANT(&nd);
1526 	if (nd.ni_vp) {
1527 		NDFREE(&nd, NDF_ONLY_PNBUF);
1528 		if (nd.ni_vp == nd.ni_dvp)
1529 			vrele(nd.ni_dvp);
1530 		else
1531 			vput(nd.ni_dvp);
1532 		vrele(nd.ni_vp);
1533 		VFS_UNLOCK_GIANT(vfslocked);
1534 		error = EEXIST;
1535 		goto out;
1536 	}
1537 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1538 		NDFREE(&nd, NDF_ONLY_PNBUF);
1539 		vput(nd.ni_dvp);
1540 		VFS_UNLOCK_GIANT(vfslocked);
1541 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1542 			goto out;
1543 		goto restart;
1544 	}
1545 	VATTR_NULL(&vattr);
1546 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1547 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1548 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1549 #ifdef MAC
1550 	vattr.va_type = VLNK;
1551 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1552 	    &vattr);
1553 	if (error)
1554 		goto out2;
1555 #endif
1556 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1557 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1558 	if (error == 0)
1559 		vput(nd.ni_vp);
1560 #ifdef MAC
1561 out2:
1562 #endif
1563 	NDFREE(&nd, NDF_ONLY_PNBUF);
1564 	vput(nd.ni_dvp);
1565 	vn_finished_write(mp);
1566 	VFS_UNLOCK_GIANT(vfslocked);
1567 out:
1568 	if (segflg != UIO_SYSSPACE)
1569 		uma_zfree(namei_zone, syspath);
1570 	return (error);
1571 }
1572 
1573 /*
1574  * Delete a whiteout from the filesystem.
1575  */
1576 int
1577 undelete(td, uap)
1578 	struct thread *td;
1579 	register struct undelete_args /* {
1580 		char *path;
1581 	} */ *uap;
1582 {
1583 	int error;
1584 	struct mount *mp;
1585 	struct nameidata nd;
1586 	int vfslocked;
1587 
1588 restart:
1589 	bwillwrite();
1590 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1591 	    UIO_USERSPACE, uap->path, td);
1592 	error = namei(&nd);
1593 	if (error)
1594 		return (error);
1595 	vfslocked = NDHASGIANT(&nd);
1596 
1597 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1598 		NDFREE(&nd, NDF_ONLY_PNBUF);
1599 		if (nd.ni_vp == nd.ni_dvp)
1600 			vrele(nd.ni_dvp);
1601 		else
1602 			vput(nd.ni_dvp);
1603 		if (nd.ni_vp)
1604 			vrele(nd.ni_vp);
1605 		VFS_UNLOCK_GIANT(vfslocked);
1606 		return (EEXIST);
1607 	}
1608 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1609 		NDFREE(&nd, NDF_ONLY_PNBUF);
1610 		vput(nd.ni_dvp);
1611 		VFS_UNLOCK_GIANT(vfslocked);
1612 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1613 			return (error);
1614 		goto restart;
1615 	}
1616 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1617 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1618 	NDFREE(&nd, NDF_ONLY_PNBUF);
1619 	vput(nd.ni_dvp);
1620 	vn_finished_write(mp);
1621 	VFS_UNLOCK_GIANT(vfslocked);
1622 	return (error);
1623 }
1624 
1625 /*
1626  * Delete a name from the filesystem.
1627  */
1628 #ifndef _SYS_SYSPROTO_H_
1629 struct unlink_args {
1630 	char	*path;
1631 };
1632 #endif
1633 int
1634 unlink(td, uap)
1635 	struct thread *td;
1636 	struct unlink_args /* {
1637 		char *path;
1638 	} */ *uap;
1639 {
1640 	int error;
1641 
1642 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1643 	return (error);
1644 }
1645 
1646 int
1647 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1648 {
1649 	struct mount *mp;
1650 	struct vnode *vp;
1651 	int error;
1652 	struct nameidata nd;
1653 	int vfslocked;
1654 
1655 restart:
1656 	bwillwrite();
1657 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1658 	    pathseg, path, td);
1659 	if ((error = namei(&nd)) != 0)
1660 		return (error == EINVAL ? EPERM : error);
1661 	vfslocked = NDHASGIANT(&nd);
1662 	vp = nd.ni_vp;
1663 	if (vp->v_type == VDIR)
1664 		error = EPERM;		/* POSIX */
1665 	else {
1666 		/*
1667 		 * The root of a mounted filesystem cannot be deleted.
1668 		 *
1669 		 * XXX: can this only be a VDIR case?
1670 		 */
1671 		if (vp->v_vflag & VV_ROOT)
1672 			error = EBUSY;
1673 	}
1674 	if (error == 0) {
1675 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1676 			NDFREE(&nd, NDF_ONLY_PNBUF);
1677 			vput(nd.ni_dvp);
1678 			if (vp == nd.ni_dvp)
1679 				vrele(vp);
1680 			else
1681 				vput(vp);
1682 			VFS_UNLOCK_GIANT(vfslocked);
1683 			if ((error = vn_start_write(NULL, &mp,
1684 			    V_XSLEEP | PCATCH)) != 0)
1685 				return (error);
1686 			goto restart;
1687 		}
1688 #ifdef MAC
1689 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1690 		    &nd.ni_cnd);
1691 		if (error)
1692 			goto out;
1693 #endif
1694 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1695 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1696 #ifdef MAC
1697 out:
1698 #endif
1699 		vn_finished_write(mp);
1700 	}
1701 	NDFREE(&nd, NDF_ONLY_PNBUF);
1702 	vput(nd.ni_dvp);
1703 	if (vp == nd.ni_dvp)
1704 		vrele(vp);
1705 	else
1706 		vput(vp);
1707 	VFS_UNLOCK_GIANT(vfslocked);
1708 	return (error);
1709 }
1710 
1711 /*
1712  * Reposition read/write file offset.
1713  */
1714 #ifndef _SYS_SYSPROTO_H_
1715 struct lseek_args {
1716 	int	fd;
1717 	int	pad;
1718 	off_t	offset;
1719 	int	whence;
1720 };
1721 #endif
1722 int
1723 lseek(td, uap)
1724 	struct thread *td;
1725 	register struct lseek_args /* {
1726 		int fd;
1727 		int pad;
1728 		off_t offset;
1729 		int whence;
1730 	} */ *uap;
1731 {
1732 	struct ucred *cred = td->td_ucred;
1733 	struct file *fp;
1734 	struct vnode *vp;
1735 	struct vattr vattr;
1736 	off_t offset;
1737 	int error, noneg;
1738 	int vfslocked;
1739 
1740 	if ((error = fget(td, uap->fd, &fp)) != 0)
1741 		return (error);
1742 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1743 		fdrop(fp, td);
1744 		return (ESPIPE);
1745 	}
1746 	vp = fp->f_vnode;
1747 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1748 	noneg = (vp->v_type != VCHR);
1749 	offset = uap->offset;
1750 	switch (uap->whence) {
1751 	case L_INCR:
1752 		if (noneg &&
1753 		    (fp->f_offset < 0 ||
1754 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1755 			error = EOVERFLOW;
1756 			break;
1757 		}
1758 		offset += fp->f_offset;
1759 		break;
1760 	case L_XTND:
1761 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1762 		error = VOP_GETATTR(vp, &vattr, cred, td);
1763 		VOP_UNLOCK(vp, 0, td);
1764 		if (error)
1765 			break;
1766 		if (noneg &&
1767 		    (vattr.va_size > OFF_MAX ||
1768 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1769 			error = EOVERFLOW;
1770 			break;
1771 		}
1772 		offset += vattr.va_size;
1773 		break;
1774 	case L_SET:
1775 		break;
1776 	default:
1777 		error = EINVAL;
1778 	}
1779 	if (error == 0 && noneg && offset < 0)
1780 		error = EINVAL;
1781 	if (error != 0)
1782 		goto drop;
1783 	fp->f_offset = offset;
1784 	*(off_t *)(td->td_retval) = fp->f_offset;
1785 drop:
1786 	fdrop(fp, td);
1787 	VFS_UNLOCK_GIANT(vfslocked);
1788 	return (error);
1789 }
1790 
1791 #if defined(COMPAT_43)
1792 /*
1793  * Reposition read/write file offset.
1794  */
1795 #ifndef _SYS_SYSPROTO_H_
1796 struct olseek_args {
1797 	int	fd;
1798 	long	offset;
1799 	int	whence;
1800 };
1801 #endif
1802 int
1803 olseek(td, uap)
1804 	struct thread *td;
1805 	register struct olseek_args /* {
1806 		int fd;
1807 		long offset;
1808 		int whence;
1809 	} */ *uap;
1810 {
1811 	struct lseek_args /* {
1812 		int fd;
1813 		int pad;
1814 		off_t offset;
1815 		int whence;
1816 	} */ nuap;
1817 	int error;
1818 
1819 	nuap.fd = uap->fd;
1820 	nuap.offset = uap->offset;
1821 	nuap.whence = uap->whence;
1822 	error = lseek(td, &nuap);
1823 	return (error);
1824 }
1825 #endif /* COMPAT_43 */
1826 
1827 /*
1828  * Check access permissions using passed credentials.
1829  */
1830 static int
1831 vn_access(vp, user_flags, cred, td)
1832 	struct vnode	*vp;
1833 	int		user_flags;
1834 	struct ucred	*cred;
1835 	struct thread	*td;
1836 {
1837 	int error, flags;
1838 
1839 	/* Flags == 0 means only check for existence. */
1840 	error = 0;
1841 	if (user_flags) {
1842 		flags = 0;
1843 		if (user_flags & R_OK)
1844 			flags |= VREAD;
1845 		if (user_flags & W_OK)
1846 			flags |= VWRITE;
1847 		if (user_flags & X_OK)
1848 			flags |= VEXEC;
1849 #ifdef MAC
1850 		error = mac_check_vnode_access(cred, vp, flags);
1851 		if (error)
1852 			return (error);
1853 #endif
1854 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1855 			error = VOP_ACCESS(vp, flags, cred, td);
1856 	}
1857 	return (error);
1858 }
1859 
1860 /*
1861  * Check access permissions using "real" credentials.
1862  */
1863 #ifndef _SYS_SYSPROTO_H_
1864 struct access_args {
1865 	char	*path;
1866 	int	flags;
1867 };
1868 #endif
1869 int
1870 access(td, uap)
1871 	struct thread *td;
1872 	register struct access_args /* {
1873 		char *path;
1874 		int flags;
1875 	} */ *uap;
1876 {
1877 
1878 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1879 }
1880 
1881 int
1882 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1883 {
1884 	struct ucred *cred, *tmpcred;
1885 	register struct vnode *vp;
1886 	struct nameidata nd;
1887 	int vfslocked;
1888 	int error;
1889 
1890 	/*
1891 	 * Create and modify a temporary credential instead of one that
1892 	 * is potentially shared.  This could also mess up socket
1893 	 * buffer accounting which can run in an interrupt context.
1894 	 */
1895 	cred = td->td_ucred;
1896 	tmpcred = crdup(cred);
1897 	tmpcred->cr_uid = cred->cr_ruid;
1898 	tmpcred->cr_groups[0] = cred->cr_rgid;
1899 	td->td_ucred = tmpcred;
1900 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1901 	    pathseg, path, td);
1902 	if ((error = namei(&nd)) != 0)
1903 		goto out1;
1904 	vfslocked = NDHASGIANT(&nd);
1905 	vp = nd.ni_vp;
1906 
1907 	error = vn_access(vp, flags, tmpcred, td);
1908 	NDFREE(&nd, NDF_ONLY_PNBUF);
1909 	vput(vp);
1910 	VFS_UNLOCK_GIANT(vfslocked);
1911 out1:
1912 	td->td_ucred = cred;
1913 	crfree(tmpcred);
1914 	return (error);
1915 }
1916 
1917 /*
1918  * Check access permissions using "effective" credentials.
1919  */
1920 #ifndef _SYS_SYSPROTO_H_
1921 struct eaccess_args {
1922 	char	*path;
1923 	int	flags;
1924 };
1925 #endif
1926 int
1927 eaccess(td, uap)
1928 	struct thread *td;
1929 	register struct eaccess_args /* {
1930 		char *path;
1931 		int flags;
1932 	} */ *uap;
1933 {
1934 
1935 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1936 }
1937 
1938 int
1939 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1940 {
1941 	struct nameidata nd;
1942 	struct vnode *vp;
1943 	int vfslocked;
1944 	int error;
1945 
1946 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1947 	    pathseg, path, td);
1948 	if ((error = namei(&nd)) != 0)
1949 		return (error);
1950 	vp = nd.ni_vp;
1951 	vfslocked = NDHASGIANT(&nd);
1952 	error = vn_access(vp, flags, td->td_ucred, td);
1953 	NDFREE(&nd, NDF_ONLY_PNBUF);
1954 	vput(vp);
1955 	VFS_UNLOCK_GIANT(vfslocked);
1956 	return (error);
1957 }
1958 
1959 #if defined(COMPAT_43)
1960 /*
1961  * Get file status; this version follows links.
1962  */
1963 #ifndef _SYS_SYSPROTO_H_
1964 struct ostat_args {
1965 	char	*path;
1966 	struct ostat *ub;
1967 };
1968 #endif
1969 int
1970 ostat(td, uap)
1971 	struct thread *td;
1972 	register struct ostat_args /* {
1973 		char *path;
1974 		struct ostat *ub;
1975 	} */ *uap;
1976 {
1977 	struct stat sb;
1978 	struct ostat osb;
1979 	int error;
1980 
1981 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1982 	if (error)
1983 		return (error);
1984 	cvtstat(&sb, &osb);
1985 	error = copyout(&osb, uap->ub, sizeof (osb));
1986 	return (error);
1987 }
1988 
1989 /*
1990  * Get file status; this version does not follow links.
1991  */
1992 #ifndef _SYS_SYSPROTO_H_
1993 struct olstat_args {
1994 	char	*path;
1995 	struct ostat *ub;
1996 };
1997 #endif
1998 int
1999 olstat(td, uap)
2000 	struct thread *td;
2001 	register struct olstat_args /* {
2002 		char *path;
2003 		struct ostat *ub;
2004 	} */ *uap;
2005 {
2006 	struct stat sb;
2007 	struct ostat osb;
2008 	int error;
2009 
2010 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2011 	if (error)
2012 		return (error);
2013 	cvtstat(&sb, &osb);
2014 	error = copyout(&osb, uap->ub, sizeof (osb));
2015 	return (error);
2016 }
2017 
2018 /*
2019  * Convert from an old to a new stat structure.
2020  */
2021 void
2022 cvtstat(st, ost)
2023 	struct stat *st;
2024 	struct ostat *ost;
2025 {
2026 
2027 	ost->st_dev = st->st_dev;
2028 	ost->st_ino = st->st_ino;
2029 	ost->st_mode = st->st_mode;
2030 	ost->st_nlink = st->st_nlink;
2031 	ost->st_uid = st->st_uid;
2032 	ost->st_gid = st->st_gid;
2033 	ost->st_rdev = st->st_rdev;
2034 	if (st->st_size < (quad_t)1 << 32)
2035 		ost->st_size = st->st_size;
2036 	else
2037 		ost->st_size = -2;
2038 	ost->st_atime = st->st_atime;
2039 	ost->st_mtime = st->st_mtime;
2040 	ost->st_ctime = st->st_ctime;
2041 	ost->st_blksize = st->st_blksize;
2042 	ost->st_blocks = st->st_blocks;
2043 	ost->st_flags = st->st_flags;
2044 	ost->st_gen = st->st_gen;
2045 }
2046 #endif /* COMPAT_43 */
2047 
2048 /*
2049  * Get file status; this version follows links.
2050  */
2051 #ifndef _SYS_SYSPROTO_H_
2052 struct stat_args {
2053 	char	*path;
2054 	struct stat *ub;
2055 };
2056 #endif
2057 int
2058 stat(td, uap)
2059 	struct thread *td;
2060 	register struct stat_args /* {
2061 		char *path;
2062 		struct stat *ub;
2063 	} */ *uap;
2064 {
2065 	struct stat sb;
2066 	int error;
2067 
2068 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2069 	if (error == 0)
2070 		error = copyout(&sb, uap->ub, sizeof (sb));
2071 	return (error);
2072 }
2073 
2074 int
2075 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2076 {
2077 	struct nameidata nd;
2078 	struct stat sb;
2079 	int error, vfslocked;
2080 
2081 	NDINIT(&nd, LOOKUP,
2082 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2083 	    pathseg, path, td);
2084 	if ((error = namei(&nd)) != 0)
2085 		return (error);
2086 	vfslocked = NDHASGIANT(&nd);
2087 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2088 	NDFREE(&nd, NDF_ONLY_PNBUF);
2089 	vput(nd.ni_vp);
2090 	VFS_UNLOCK_GIANT(vfslocked);
2091 	if (mtx_owned(&Giant))
2092 		printf("stat(%d): %s\n", vfslocked, path);
2093 	if (error)
2094 		return (error);
2095 	*sbp = sb;
2096 	return (0);
2097 }
2098 
2099 /*
2100  * Get file status; this version does not follow links.
2101  */
2102 #ifndef _SYS_SYSPROTO_H_
2103 struct lstat_args {
2104 	char	*path;
2105 	struct stat *ub;
2106 };
2107 #endif
2108 int
2109 lstat(td, uap)
2110 	struct thread *td;
2111 	register struct lstat_args /* {
2112 		char *path;
2113 		struct stat *ub;
2114 	} */ *uap;
2115 {
2116 	struct stat sb;
2117 	int error;
2118 
2119 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2120 	if (error == 0)
2121 		error = copyout(&sb, uap->ub, sizeof (sb));
2122 	return (error);
2123 }
2124 
2125 int
2126 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2127 {
2128 	struct vnode *vp;
2129 	struct stat sb;
2130 	struct nameidata nd;
2131 	int error, vfslocked;
2132 
2133 	NDINIT(&nd, LOOKUP,
2134 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2135 	    pathseg, path, td);
2136 	if ((error = namei(&nd)) != 0)
2137 		return (error);
2138 	vfslocked = NDHASGIANT(&nd);
2139 	vp = nd.ni_vp;
2140 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2141 	NDFREE(&nd, NDF_ONLY_PNBUF);
2142 	vput(vp);
2143 	VFS_UNLOCK_GIANT(vfslocked);
2144 	if (error)
2145 		return (error);
2146 	*sbp = sb;
2147 	return (0);
2148 }
2149 
2150 /*
2151  * Implementation of the NetBSD [l]stat() functions.
2152  */
2153 void
2154 cvtnstat(sb, nsb)
2155 	struct stat *sb;
2156 	struct nstat *nsb;
2157 {
2158 	bzero(nsb, sizeof *nsb);
2159 	nsb->st_dev = sb->st_dev;
2160 	nsb->st_ino = sb->st_ino;
2161 	nsb->st_mode = sb->st_mode;
2162 	nsb->st_nlink = sb->st_nlink;
2163 	nsb->st_uid = sb->st_uid;
2164 	nsb->st_gid = sb->st_gid;
2165 	nsb->st_rdev = sb->st_rdev;
2166 	nsb->st_atimespec = sb->st_atimespec;
2167 	nsb->st_mtimespec = sb->st_mtimespec;
2168 	nsb->st_ctimespec = sb->st_ctimespec;
2169 	nsb->st_size = sb->st_size;
2170 	nsb->st_blocks = sb->st_blocks;
2171 	nsb->st_blksize = sb->st_blksize;
2172 	nsb->st_flags = sb->st_flags;
2173 	nsb->st_gen = sb->st_gen;
2174 	nsb->st_birthtimespec = sb->st_birthtimespec;
2175 }
2176 
2177 #ifndef _SYS_SYSPROTO_H_
2178 struct nstat_args {
2179 	char	*path;
2180 	struct nstat *ub;
2181 };
2182 #endif
2183 int
2184 nstat(td, uap)
2185 	struct thread *td;
2186 	register struct nstat_args /* {
2187 		char *path;
2188 		struct nstat *ub;
2189 	} */ *uap;
2190 {
2191 	struct stat sb;
2192 	struct nstat nsb;
2193 	int error;
2194 
2195 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2196 	if (error)
2197 		return (error);
2198 	cvtnstat(&sb, &nsb);
2199 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2200 	return (error);
2201 }
2202 
2203 /*
2204  * NetBSD lstat.  Get file status; this version does not follow links.
2205  */
2206 #ifndef _SYS_SYSPROTO_H_
2207 struct lstat_args {
2208 	char	*path;
2209 	struct stat *ub;
2210 };
2211 #endif
2212 int
2213 nlstat(td, uap)
2214 	struct thread *td;
2215 	register struct nlstat_args /* {
2216 		char *path;
2217 		struct nstat *ub;
2218 	} */ *uap;
2219 {
2220 	struct stat sb;
2221 	struct nstat nsb;
2222 	int error;
2223 
2224 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2225 	if (error)
2226 		return (error);
2227 	cvtnstat(&sb, &nsb);
2228 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2229 	return (error);
2230 }
2231 
2232 /*
2233  * Get configurable pathname variables.
2234  */
2235 #ifndef _SYS_SYSPROTO_H_
2236 struct pathconf_args {
2237 	char	*path;
2238 	int	name;
2239 };
2240 #endif
2241 int
2242 pathconf(td, uap)
2243 	struct thread *td;
2244 	register struct pathconf_args /* {
2245 		char *path;
2246 		int name;
2247 	} */ *uap;
2248 {
2249 
2250 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2251 }
2252 
2253 int
2254 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2255 {
2256 	struct nameidata nd;
2257 	int error, vfslocked;
2258 
2259 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2260 	    pathseg, path, td);
2261 	if ((error = namei(&nd)) != 0)
2262 		return (error);
2263 	vfslocked = NDHASGIANT(&nd);
2264 	NDFREE(&nd, NDF_ONLY_PNBUF);
2265 
2266 	/* If asynchronous I/O is available, it works for all files. */
2267 	if (name == _PC_ASYNC_IO)
2268 		td->td_retval[0] = async_io_version;
2269 	else
2270 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2271 	vput(nd.ni_vp);
2272 	VFS_UNLOCK_GIANT(vfslocked);
2273 	return (error);
2274 }
2275 
2276 /*
2277  * Return target name of a symbolic link.
2278  */
2279 #ifndef _SYS_SYSPROTO_H_
2280 struct readlink_args {
2281 	char	*path;
2282 	char	*buf;
2283 	int	count;
2284 };
2285 #endif
2286 int
2287 readlink(td, uap)
2288 	struct thread *td;
2289 	register struct readlink_args /* {
2290 		char *path;
2291 		char *buf;
2292 		int count;
2293 	} */ *uap;
2294 {
2295 
2296 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2297 	    UIO_USERSPACE, uap->count));
2298 }
2299 
2300 int
2301 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2302     enum uio_seg bufseg, int count)
2303 {
2304 	register struct vnode *vp;
2305 	struct iovec aiov;
2306 	struct uio auio;
2307 	int error;
2308 	struct nameidata nd;
2309 	int vfslocked;
2310 
2311 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2312 	    pathseg, path, td);
2313 	if ((error = namei(&nd)) != 0)
2314 		return (error);
2315 	NDFREE(&nd, NDF_ONLY_PNBUF);
2316 	vfslocked = NDHASGIANT(&nd);
2317 	vp = nd.ni_vp;
2318 #ifdef MAC
2319 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2320 	if (error) {
2321 		vput(vp);
2322 		VFS_UNLOCK_GIANT(vfslocked);
2323 		return (error);
2324 	}
2325 #endif
2326 	if (vp->v_type != VLNK)
2327 		error = EINVAL;
2328 	else {
2329 		aiov.iov_base = buf;
2330 		aiov.iov_len = count;
2331 		auio.uio_iov = &aiov;
2332 		auio.uio_iovcnt = 1;
2333 		auio.uio_offset = 0;
2334 		auio.uio_rw = UIO_READ;
2335 		auio.uio_segflg = bufseg;
2336 		auio.uio_td = td;
2337 		auio.uio_resid = count;
2338 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2339 	}
2340 	vput(vp);
2341 	VFS_UNLOCK_GIANT(vfslocked);
2342 	td->td_retval[0] = count - auio.uio_resid;
2343 	return (error);
2344 }
2345 
2346 /*
2347  * Common implementation code for chflags() and fchflags().
2348  */
2349 static int
2350 setfflags(td, vp, flags)
2351 	struct thread *td;
2352 	struct vnode *vp;
2353 	int flags;
2354 {
2355 	int error;
2356 	struct mount *mp;
2357 	struct vattr vattr;
2358 
2359 	/*
2360 	 * Prevent non-root users from setting flags on devices.  When
2361 	 * a device is reused, users can retain ownership of the device
2362 	 * if they are allowed to set flags and programs assume that
2363 	 * chown can't fail when done as root.
2364 	 */
2365 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2366 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2367 		    SUSER_ALLOWJAIL);
2368 		if (error)
2369 			return (error);
2370 	}
2371 
2372 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2373 		return (error);
2374 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2375 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2376 	VATTR_NULL(&vattr);
2377 	vattr.va_flags = flags;
2378 #ifdef MAC
2379 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2380 	if (error == 0)
2381 #endif
2382 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2383 	VOP_UNLOCK(vp, 0, td);
2384 	vn_finished_write(mp);
2385 	return (error);
2386 }
2387 
2388 /*
2389  * Change flags of a file given a path name.
2390  */
2391 #ifndef _SYS_SYSPROTO_H_
2392 struct chflags_args {
2393 	char	*path;
2394 	int	flags;
2395 };
2396 #endif
2397 int
2398 chflags(td, uap)
2399 	struct thread *td;
2400 	register struct chflags_args /* {
2401 		char *path;
2402 		int flags;
2403 	} */ *uap;
2404 {
2405 	int error;
2406 	struct nameidata nd;
2407 	int vfslocked;
2408 
2409 	AUDIT_ARG(fflags, uap->flags);
2410 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2411 	    uap->path, td);
2412 	if ((error = namei(&nd)) != 0)
2413 		return (error);
2414 	NDFREE(&nd, NDF_ONLY_PNBUF);
2415 	vfslocked = NDHASGIANT(&nd);
2416 	error = setfflags(td, nd.ni_vp, uap->flags);
2417 	vrele(nd.ni_vp);
2418 	VFS_UNLOCK_GIANT(vfslocked);
2419 	return (error);
2420 }
2421 
2422 /*
2423  * Same as chflags() but doesn't follow symlinks.
2424  */
2425 int
2426 lchflags(td, uap)
2427 	struct thread *td;
2428 	register struct lchflags_args /* {
2429 		char *path;
2430 		int flags;
2431 	} */ *uap;
2432 {
2433 	int error;
2434 	struct nameidata nd;
2435 	int vfslocked;
2436 
2437 	AUDIT_ARG(fflags, uap->flags);
2438 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2439 	    uap->path, td);
2440 	if ((error = namei(&nd)) != 0)
2441 		return (error);
2442 	vfslocked = NDHASGIANT(&nd);
2443 	NDFREE(&nd, NDF_ONLY_PNBUF);
2444 	error = setfflags(td, nd.ni_vp, uap->flags);
2445 	vrele(nd.ni_vp);
2446 	VFS_UNLOCK_GIANT(vfslocked);
2447 	return (error);
2448 }
2449 
2450 /*
2451  * Change flags of a file given a file descriptor.
2452  */
2453 #ifndef _SYS_SYSPROTO_H_
2454 struct fchflags_args {
2455 	int	fd;
2456 	int	flags;
2457 };
2458 #endif
2459 int
2460 fchflags(td, uap)
2461 	struct thread *td;
2462 	register struct fchflags_args /* {
2463 		int fd;
2464 		int flags;
2465 	} */ *uap;
2466 {
2467 	struct file *fp;
2468 	int vfslocked;
2469 	int error;
2470 
2471 	AUDIT_ARG(fd, uap->fd);
2472 	AUDIT_ARG(fflags, uap->flags);
2473 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2474 		return (error);
2475 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2476 #ifdef AUDIT
2477 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2478 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2479 	VOP_UNLOCK(fp->f_vnode, 0, td);
2480 #endif
2481 	error = setfflags(td, fp->f_vnode, uap->flags);
2482 	VFS_UNLOCK_GIANT(vfslocked);
2483 	fdrop(fp, td);
2484 	return (error);
2485 }
2486 
2487 /*
2488  * Common implementation code for chmod(), lchmod() and fchmod().
2489  */
2490 static int
2491 setfmode(td, vp, mode)
2492 	struct thread *td;
2493 	struct vnode *vp;
2494 	int mode;
2495 {
2496 	int error;
2497 	struct mount *mp;
2498 	struct vattr vattr;
2499 
2500 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2501 		return (error);
2502 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2503 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2504 	VATTR_NULL(&vattr);
2505 	vattr.va_mode = mode & ALLPERMS;
2506 #ifdef MAC
2507 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2508 	if (error == 0)
2509 #endif
2510 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2511 	VOP_UNLOCK(vp, 0, td);
2512 	vn_finished_write(mp);
2513 	return (error);
2514 }
2515 
2516 /*
2517  * Change mode of a file given path name.
2518  */
2519 #ifndef _SYS_SYSPROTO_H_
2520 struct chmod_args {
2521 	char	*path;
2522 	int	mode;
2523 };
2524 #endif
2525 int
2526 chmod(td, uap)
2527 	struct thread *td;
2528 	register struct chmod_args /* {
2529 		char *path;
2530 		int mode;
2531 	} */ *uap;
2532 {
2533 
2534 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2535 }
2536 
2537 int
2538 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2539 {
2540 	int error;
2541 	struct nameidata nd;
2542 	int vfslocked;
2543 
2544 	AUDIT_ARG(mode, mode);
2545 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2546 	if ((error = namei(&nd)) != 0)
2547 		return (error);
2548 	vfslocked = NDHASGIANT(&nd);
2549 	NDFREE(&nd, NDF_ONLY_PNBUF);
2550 	error = setfmode(td, nd.ni_vp, mode);
2551 	vrele(nd.ni_vp);
2552 	VFS_UNLOCK_GIANT(vfslocked);
2553 	return (error);
2554 }
2555 
2556 /*
2557  * Change mode of a file given path name (don't follow links.)
2558  */
2559 #ifndef _SYS_SYSPROTO_H_
2560 struct lchmod_args {
2561 	char	*path;
2562 	int	mode;
2563 };
2564 #endif
2565 int
2566 lchmod(td, uap)
2567 	struct thread *td;
2568 	register struct lchmod_args /* {
2569 		char *path;
2570 		int mode;
2571 	} */ *uap;
2572 {
2573 	int error;
2574 	struct nameidata nd;
2575 	int vfslocked;
2576 
2577 	AUDIT_ARG(mode, (mode_t)uap->mode);
2578 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2579 	    uap->path, td);
2580 	if ((error = namei(&nd)) != 0)
2581 		return (error);
2582 	vfslocked = NDHASGIANT(&nd);
2583 	NDFREE(&nd, NDF_ONLY_PNBUF);
2584 	error = setfmode(td, nd.ni_vp, uap->mode);
2585 	vrele(nd.ni_vp);
2586 	VFS_UNLOCK_GIANT(vfslocked);
2587 	return (error);
2588 }
2589 
2590 /*
2591  * Change mode of a file given a file descriptor.
2592  */
2593 #ifndef _SYS_SYSPROTO_H_
2594 struct fchmod_args {
2595 	int	fd;
2596 	int	mode;
2597 };
2598 #endif
2599 int
2600 fchmod(td, uap)
2601 	struct thread *td;
2602 	register struct fchmod_args /* {
2603 		int fd;
2604 		int mode;
2605 	} */ *uap;
2606 {
2607 	struct file *fp;
2608 	int vfslocked;
2609 	int error;
2610 
2611 	AUDIT_ARG(fd, uap->fd);
2612 	AUDIT_ARG(mode, uap->mode);
2613 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2614 		return (error);
2615 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2616 #ifdef AUDIT
2617 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2618 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2619 	VOP_UNLOCK(fp->f_vnode, 0, td);
2620 #endif
2621 	error = setfmode(td, fp->f_vnode, uap->mode);
2622 	VFS_UNLOCK_GIANT(vfslocked);
2623 	fdrop(fp, td);
2624 	return (error);
2625 }
2626 
2627 /*
2628  * Common implementation for chown(), lchown(), and fchown()
2629  */
2630 static int
2631 setfown(td, vp, uid, gid)
2632 	struct thread *td;
2633 	struct vnode *vp;
2634 	uid_t uid;
2635 	gid_t gid;
2636 {
2637 	int error;
2638 	struct mount *mp;
2639 	struct vattr vattr;
2640 
2641 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2642 		return (error);
2643 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2644 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2645 	VATTR_NULL(&vattr);
2646 	vattr.va_uid = uid;
2647 	vattr.va_gid = gid;
2648 #ifdef MAC
2649 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2650 	    vattr.va_gid);
2651 	if (error == 0)
2652 #endif
2653 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2654 	VOP_UNLOCK(vp, 0, td);
2655 	vn_finished_write(mp);
2656 	return (error);
2657 }
2658 
2659 /*
2660  * Set ownership given a path name.
2661  */
2662 #ifndef _SYS_SYSPROTO_H_
2663 struct chown_args {
2664 	char	*path;
2665 	int	uid;
2666 	int	gid;
2667 };
2668 #endif
2669 int
2670 chown(td, uap)
2671 	struct thread *td;
2672 	register struct chown_args /* {
2673 		char *path;
2674 		int uid;
2675 		int gid;
2676 	} */ *uap;
2677 {
2678 
2679 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2680 }
2681 
2682 int
2683 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2684     int gid)
2685 {
2686 	int error;
2687 	struct nameidata nd;
2688 	int vfslocked;
2689 
2690 	AUDIT_ARG(owner, uid, gid);
2691 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2692 	if ((error = namei(&nd)) != 0)
2693 		return (error);
2694 	vfslocked = NDHASGIANT(&nd);
2695 	NDFREE(&nd, NDF_ONLY_PNBUF);
2696 	error = setfown(td, nd.ni_vp, uid, gid);
2697 	vrele(nd.ni_vp);
2698 	VFS_UNLOCK_GIANT(vfslocked);
2699 	return (error);
2700 }
2701 
2702 /*
2703  * Set ownership given a path name, do not cross symlinks.
2704  */
2705 #ifndef _SYS_SYSPROTO_H_
2706 struct lchown_args {
2707 	char	*path;
2708 	int	uid;
2709 	int	gid;
2710 };
2711 #endif
2712 int
2713 lchown(td, uap)
2714 	struct thread *td;
2715 	register struct lchown_args /* {
2716 		char *path;
2717 		int uid;
2718 		int gid;
2719 	} */ *uap;
2720 {
2721 
2722 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2723 }
2724 
2725 int
2726 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2727     int gid)
2728 {
2729 	int error;
2730 	struct nameidata nd;
2731 	int vfslocked;
2732 
2733 	AUDIT_ARG(owner, uid, gid);
2734 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2735 	if ((error = namei(&nd)) != 0)
2736 		return (error);
2737 	vfslocked = NDHASGIANT(&nd);
2738 	NDFREE(&nd, NDF_ONLY_PNBUF);
2739 	error = setfown(td, nd.ni_vp, uid, gid);
2740 	vrele(nd.ni_vp);
2741 	VFS_UNLOCK_GIANT(vfslocked);
2742 	return (error);
2743 }
2744 
2745 /*
2746  * Set ownership given a file descriptor.
2747  */
2748 #ifndef _SYS_SYSPROTO_H_
2749 struct fchown_args {
2750 	int	fd;
2751 	int	uid;
2752 	int	gid;
2753 };
2754 #endif
2755 int
2756 fchown(td, uap)
2757 	struct thread *td;
2758 	register struct fchown_args /* {
2759 		int fd;
2760 		int uid;
2761 		int gid;
2762 	} */ *uap;
2763 {
2764 	struct file *fp;
2765 	int vfslocked;
2766 	int error;
2767 
2768 	AUDIT_ARG(fd, uap->fd);
2769 	AUDIT_ARG(owner, uap->uid, uap->gid);
2770 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2771 		return (error);
2772 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2773 #ifdef AUDIT
2774 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2775 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2776 	VOP_UNLOCK(fp->f_vnode, 0, td);
2777 #endif
2778 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2779 	VFS_UNLOCK_GIANT(vfslocked);
2780 	fdrop(fp, td);
2781 	return (error);
2782 }
2783 
2784 /*
2785  * Common implementation code for utimes(), lutimes(), and futimes().
2786  */
2787 static int
2788 getutimes(usrtvp, tvpseg, tsp)
2789 	const struct timeval *usrtvp;
2790 	enum uio_seg tvpseg;
2791 	struct timespec *tsp;
2792 {
2793 	struct timeval tv[2];
2794 	const struct timeval *tvp;
2795 	int error;
2796 
2797 	if (usrtvp == NULL) {
2798 		microtime(&tv[0]);
2799 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2800 		tsp[1] = tsp[0];
2801 	} else {
2802 		if (tvpseg == UIO_SYSSPACE) {
2803 			tvp = usrtvp;
2804 		} else {
2805 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2806 				return (error);
2807 			tvp = tv;
2808 		}
2809 
2810 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2811 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2812 			return (EINVAL);
2813 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2814 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2815 	}
2816 	return (0);
2817 }
2818 
2819 /*
2820  * Common implementation code for utimes(), lutimes(), and futimes().
2821  */
2822 static int
2823 setutimes(td, vp, ts, numtimes, nullflag)
2824 	struct thread *td;
2825 	struct vnode *vp;
2826 	const struct timespec *ts;
2827 	int numtimes;
2828 	int nullflag;
2829 {
2830 	int error, setbirthtime;
2831 	struct mount *mp;
2832 	struct vattr vattr;
2833 
2834 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2835 		return (error);
2836 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2837 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2838 	setbirthtime = 0;
2839 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2840 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2841 		setbirthtime = 1;
2842 	VATTR_NULL(&vattr);
2843 	vattr.va_atime = ts[0];
2844 	vattr.va_mtime = ts[1];
2845 	if (setbirthtime)
2846 		vattr.va_birthtime = ts[1];
2847 	if (numtimes > 2)
2848 		vattr.va_birthtime = ts[2];
2849 	if (nullflag)
2850 		vattr.va_vaflags |= VA_UTIMES_NULL;
2851 #ifdef MAC
2852 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2853 	    vattr.va_mtime);
2854 #endif
2855 	if (error == 0)
2856 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2857 	VOP_UNLOCK(vp, 0, td);
2858 	vn_finished_write(mp);
2859 	return (error);
2860 }
2861 
2862 /*
2863  * Set the access and modification times of a file.
2864  */
2865 #ifndef _SYS_SYSPROTO_H_
2866 struct utimes_args {
2867 	char	*path;
2868 	struct	timeval *tptr;
2869 };
2870 #endif
2871 int
2872 utimes(td, uap)
2873 	struct thread *td;
2874 	register struct utimes_args /* {
2875 		char *path;
2876 		struct timeval *tptr;
2877 	} */ *uap;
2878 {
2879 
2880 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2881 	    UIO_USERSPACE));
2882 }
2883 
2884 int
2885 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2886     struct timeval *tptr, enum uio_seg tptrseg)
2887 {
2888 	struct timespec ts[2];
2889 	int error;
2890 	struct nameidata nd;
2891 	int vfslocked;
2892 
2893 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2894 		return (error);
2895 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2896 	if ((error = namei(&nd)) != 0)
2897 		return (error);
2898 	vfslocked = NDHASGIANT(&nd);
2899 	NDFREE(&nd, NDF_ONLY_PNBUF);
2900 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2901 	vrele(nd.ni_vp);
2902 	VFS_UNLOCK_GIANT(vfslocked);
2903 	return (error);
2904 }
2905 
2906 /*
2907  * Set the access and modification times of a file.
2908  */
2909 #ifndef _SYS_SYSPROTO_H_
2910 struct lutimes_args {
2911 	char	*path;
2912 	struct	timeval *tptr;
2913 };
2914 #endif
2915 int
2916 lutimes(td, uap)
2917 	struct thread *td;
2918 	register struct lutimes_args /* {
2919 		char *path;
2920 		struct timeval *tptr;
2921 	} */ *uap;
2922 {
2923 
2924 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2925 	    UIO_USERSPACE));
2926 }
2927 
2928 int
2929 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2930     struct timeval *tptr, enum uio_seg tptrseg)
2931 {
2932 	struct timespec ts[2];
2933 	int error;
2934 	struct nameidata nd;
2935 	int vfslocked;
2936 
2937 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2938 		return (error);
2939 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2940 	if ((error = namei(&nd)) != 0)
2941 		return (error);
2942 	vfslocked = NDHASGIANT(&nd);
2943 	NDFREE(&nd, NDF_ONLY_PNBUF);
2944 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2945 	vrele(nd.ni_vp);
2946 	VFS_UNLOCK_GIANT(vfslocked);
2947 	return (error);
2948 }
2949 
2950 /*
2951  * Set the access and modification times of a file.
2952  */
2953 #ifndef _SYS_SYSPROTO_H_
2954 struct futimes_args {
2955 	int	fd;
2956 	struct	timeval *tptr;
2957 };
2958 #endif
2959 int
2960 futimes(td, uap)
2961 	struct thread *td;
2962 	register struct futimes_args /* {
2963 		int  fd;
2964 		struct timeval *tptr;
2965 	} */ *uap;
2966 {
2967 
2968 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2969 }
2970 
2971 int
2972 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2973     enum uio_seg tptrseg)
2974 {
2975 	struct timespec ts[2];
2976 	struct file *fp;
2977 	int vfslocked;
2978 	int error;
2979 
2980 	AUDIT_ARG(fd, fd);
2981 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2982 		return (error);
2983 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2984 		return (error);
2985 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2986 #ifdef AUDIT
2987 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2988 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2989 	VOP_UNLOCK(fp->f_vnode, 0, td);
2990 #endif
2991 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2992 	VFS_UNLOCK_GIANT(vfslocked);
2993 	fdrop(fp, td);
2994 	return (error);
2995 }
2996 
2997 /*
2998  * Truncate a file given its path name.
2999  */
3000 #ifndef _SYS_SYSPROTO_H_
3001 struct truncate_args {
3002 	char	*path;
3003 	int	pad;
3004 	off_t	length;
3005 };
3006 #endif
3007 int
3008 truncate(td, uap)
3009 	struct thread *td;
3010 	register struct truncate_args /* {
3011 		char *path;
3012 		int pad;
3013 		off_t length;
3014 	} */ *uap;
3015 {
3016 
3017 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3018 }
3019 
3020 int
3021 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3022 {
3023 	struct mount *mp;
3024 	struct vnode *vp;
3025 	struct vattr vattr;
3026 	int error;
3027 	struct nameidata nd;
3028 	int vfslocked;
3029 
3030 	if (length < 0)
3031 		return(EINVAL);
3032 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3033 	if ((error = namei(&nd)) != 0)
3034 		return (error);
3035 	vfslocked = NDHASGIANT(&nd);
3036 	vp = nd.ni_vp;
3037 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3038 		vrele(vp);
3039 		VFS_UNLOCK_GIANT(vfslocked);
3040 		return (error);
3041 	}
3042 	NDFREE(&nd, NDF_ONLY_PNBUF);
3043 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3044 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3045 	if (vp->v_type == VDIR)
3046 		error = EISDIR;
3047 #ifdef MAC
3048 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3049 	}
3050 #endif
3051 	else if ((error = vn_writechk(vp)) == 0 &&
3052 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3053 		VATTR_NULL(&vattr);
3054 		vattr.va_size = length;
3055 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3056 	}
3057 	vput(vp);
3058 	vn_finished_write(mp);
3059 	VFS_UNLOCK_GIANT(vfslocked);
3060 	return (error);
3061 }
3062 
3063 /*
3064  * Truncate a file given a file descriptor.
3065  */
3066 #ifndef _SYS_SYSPROTO_H_
3067 struct ftruncate_args {
3068 	int	fd;
3069 	int	pad;
3070 	off_t	length;
3071 };
3072 #endif
3073 int
3074 ftruncate(td, uap)
3075 	struct thread *td;
3076 	register struct ftruncate_args /* {
3077 		int fd;
3078 		int pad;
3079 		off_t length;
3080 	} */ *uap;
3081 {
3082 	struct mount *mp;
3083 	struct vattr vattr;
3084 	struct vnode *vp;
3085 	struct file *fp;
3086 	int vfslocked;
3087 	int error;
3088 
3089 	AUDIT_ARG(fd, uap->fd);
3090 	if (uap->length < 0)
3091 		return(EINVAL);
3092 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3093 		return (error);
3094 	if ((fp->f_flag & FWRITE) == 0) {
3095 		fdrop(fp, td);
3096 		return (EINVAL);
3097 	}
3098 	vp = fp->f_vnode;
3099 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3100 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3101 		goto drop;
3102 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3103 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3104 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3105 	if (vp->v_type == VDIR)
3106 		error = EISDIR;
3107 #ifdef MAC
3108 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3109 	    vp))) {
3110 	}
3111 #endif
3112 	else if ((error = vn_writechk(vp)) == 0) {
3113 		VATTR_NULL(&vattr);
3114 		vattr.va_size = uap->length;
3115 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3116 	}
3117 	VOP_UNLOCK(vp, 0, td);
3118 	vn_finished_write(mp);
3119 drop:
3120 	VFS_UNLOCK_GIANT(vfslocked);
3121 	fdrop(fp, td);
3122 	return (error);
3123 }
3124 
3125 #if defined(COMPAT_43)
3126 /*
3127  * Truncate a file given its path name.
3128  */
3129 #ifndef _SYS_SYSPROTO_H_
3130 struct otruncate_args {
3131 	char	*path;
3132 	long	length;
3133 };
3134 #endif
3135 int
3136 otruncate(td, uap)
3137 	struct thread *td;
3138 	register struct otruncate_args /* {
3139 		char *path;
3140 		long length;
3141 	} */ *uap;
3142 {
3143 	struct truncate_args /* {
3144 		char *path;
3145 		int pad;
3146 		off_t length;
3147 	} */ nuap;
3148 
3149 	nuap.path = uap->path;
3150 	nuap.length = uap->length;
3151 	return (truncate(td, &nuap));
3152 }
3153 
3154 /*
3155  * Truncate a file given a file descriptor.
3156  */
3157 #ifndef _SYS_SYSPROTO_H_
3158 struct oftruncate_args {
3159 	int	fd;
3160 	long	length;
3161 };
3162 #endif
3163 int
3164 oftruncate(td, uap)
3165 	struct thread *td;
3166 	register struct oftruncate_args /* {
3167 		int fd;
3168 		long length;
3169 	} */ *uap;
3170 {
3171 	struct ftruncate_args /* {
3172 		int fd;
3173 		int pad;
3174 		off_t length;
3175 	} */ nuap;
3176 
3177 	nuap.fd = uap->fd;
3178 	nuap.length = uap->length;
3179 	return (ftruncate(td, &nuap));
3180 }
3181 #endif /* COMPAT_43 */
3182 
3183 /*
3184  * Sync an open file.
3185  */
3186 #ifndef _SYS_SYSPROTO_H_
3187 struct fsync_args {
3188 	int	fd;
3189 };
3190 #endif
3191 int
3192 fsync(td, uap)
3193 	struct thread *td;
3194 	struct fsync_args /* {
3195 		int fd;
3196 	} */ *uap;
3197 {
3198 	struct vnode *vp;
3199 	struct mount *mp;
3200 	struct file *fp;
3201 	int vfslocked;
3202 	int error;
3203 
3204 	AUDIT_ARG(fd, uap->fd);
3205 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3206 		return (error);
3207 	vp = fp->f_vnode;
3208 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3209 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3210 		goto drop;
3211 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3212 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3213 	if (vp->v_object != NULL) {
3214 		VM_OBJECT_LOCK(vp->v_object);
3215 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3216 		VM_OBJECT_UNLOCK(vp->v_object);
3217 	}
3218 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3219 
3220 	VOP_UNLOCK(vp, 0, td);
3221 	vn_finished_write(mp);
3222 drop:
3223 	VFS_UNLOCK_GIANT(vfslocked);
3224 	fdrop(fp, td);
3225 	return (error);
3226 }
3227 
3228 /*
3229  * Rename files.  Source and destination must either both be directories,
3230  * or both not be directories.  If target is a directory, it must be empty.
3231  */
3232 #ifndef _SYS_SYSPROTO_H_
3233 struct rename_args {
3234 	char	*from;
3235 	char	*to;
3236 };
3237 #endif
3238 int
3239 rename(td, uap)
3240 	struct thread *td;
3241 	register struct rename_args /* {
3242 		char *from;
3243 		char *to;
3244 	} */ *uap;
3245 {
3246 
3247 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3248 }
3249 
3250 int
3251 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3252 {
3253 	struct mount *mp = NULL;
3254 	struct vnode *tvp, *fvp, *tdvp;
3255 	struct nameidata fromnd, tond;
3256 	int tvfslocked;
3257 	int fvfslocked;
3258 	int error;
3259 
3260 	bwillwrite();
3261 #ifdef MAC
3262 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3263 	    AUDITVNODE1, pathseg, from, td);
3264 #else
3265 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3266 	    AUDITVNODE1, pathseg, from, td);
3267 #endif
3268 	if ((error = namei(&fromnd)) != 0)
3269 		return (error);
3270 	fvfslocked = NDHASGIANT(&fromnd);
3271 	tvfslocked = 0;
3272 #ifdef MAC
3273 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3274 	    fromnd.ni_vp, &fromnd.ni_cnd);
3275 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3276 	if (fromnd.ni_dvp != fromnd.ni_vp)
3277 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3278 #endif
3279 	fvp = fromnd.ni_vp;
3280 	if (error == 0)
3281 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3282 	if (error != 0) {
3283 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3284 		vrele(fromnd.ni_dvp);
3285 		vrele(fvp);
3286 		goto out1;
3287 	}
3288 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3289 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3290 	if (fromnd.ni_vp->v_type == VDIR)
3291 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3292 	if ((error = namei(&tond)) != 0) {
3293 		/* Translate error code for rename("dir1", "dir2/."). */
3294 		if (error == EISDIR && fvp->v_type == VDIR)
3295 			error = EINVAL;
3296 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3297 		vrele(fromnd.ni_dvp);
3298 		vrele(fvp);
3299 		vn_finished_write(mp);
3300 		goto out1;
3301 	}
3302 	tvfslocked = NDHASGIANT(&tond);
3303 	tdvp = tond.ni_dvp;
3304 	tvp = tond.ni_vp;
3305 	if (tvp != NULL) {
3306 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3307 			error = ENOTDIR;
3308 			goto out;
3309 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3310 			error = EISDIR;
3311 			goto out;
3312 		}
3313 	}
3314 	if (fvp == tdvp)
3315 		error = EINVAL;
3316 	/*
3317 	 * If the source is the same as the destination (that is, if they
3318 	 * are links to the same vnode), then there is nothing to do.
3319 	 */
3320 	if (fvp == tvp)
3321 		error = -1;
3322 #ifdef MAC
3323 	else
3324 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3325 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3326 #endif
3327 out:
3328 	if (!error) {
3329 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3330 		if (fromnd.ni_dvp != tdvp) {
3331 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3332 		}
3333 		if (tvp) {
3334 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3335 		}
3336 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3337 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3338 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3339 		NDFREE(&tond, NDF_ONLY_PNBUF);
3340 	} else {
3341 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3342 		NDFREE(&tond, NDF_ONLY_PNBUF);
3343 		if (tvp)
3344 			vput(tvp);
3345 		if (tdvp == tvp)
3346 			vrele(tdvp);
3347 		else
3348 			vput(tdvp);
3349 		vrele(fromnd.ni_dvp);
3350 		vrele(fvp);
3351 	}
3352 	vrele(tond.ni_startdir);
3353 	vn_finished_write(mp);
3354 out1:
3355 	if (fromnd.ni_startdir)
3356 		vrele(fromnd.ni_startdir);
3357 	VFS_UNLOCK_GIANT(fvfslocked);
3358 	VFS_UNLOCK_GIANT(tvfslocked);
3359 	if (error == -1)
3360 		return (0);
3361 	return (error);
3362 }
3363 
3364 /*
3365  * Make a directory file.
3366  */
3367 #ifndef _SYS_SYSPROTO_H_
3368 struct mkdir_args {
3369 	char	*path;
3370 	int	mode;
3371 };
3372 #endif
3373 int
3374 mkdir(td, uap)
3375 	struct thread *td;
3376 	register struct mkdir_args /* {
3377 		char *path;
3378 		int mode;
3379 	} */ *uap;
3380 {
3381 
3382 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3383 }
3384 
3385 int
3386 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3387 {
3388 	struct mount *mp;
3389 	struct vnode *vp;
3390 	struct vattr vattr;
3391 	int error;
3392 	struct nameidata nd;
3393 	int vfslocked;
3394 
3395 	AUDIT_ARG(mode, mode);
3396 restart:
3397 	bwillwrite();
3398 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3399 	    segflg, path, td);
3400 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3401 	if ((error = namei(&nd)) != 0)
3402 		return (error);
3403 	vfslocked = NDHASGIANT(&nd);
3404 	vp = nd.ni_vp;
3405 	if (vp != NULL) {
3406 		NDFREE(&nd, NDF_ONLY_PNBUF);
3407 		/*
3408 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3409 		 * the strange behaviour of leaving the vnode unlocked
3410 		 * if the target is the same vnode as the parent.
3411 		 */
3412 		if (vp == nd.ni_dvp)
3413 			vrele(nd.ni_dvp);
3414 		else
3415 			vput(nd.ni_dvp);
3416 		vrele(vp);
3417 		VFS_UNLOCK_GIANT(vfslocked);
3418 		return (EEXIST);
3419 	}
3420 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3421 		NDFREE(&nd, NDF_ONLY_PNBUF);
3422 		vput(nd.ni_dvp);
3423 		VFS_UNLOCK_GIANT(vfslocked);
3424 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3425 			return (error);
3426 		goto restart;
3427 	}
3428 	VATTR_NULL(&vattr);
3429 	vattr.va_type = VDIR;
3430 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3431 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3432 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3433 #ifdef MAC
3434 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3435 	    &vattr);
3436 	if (error)
3437 		goto out;
3438 #endif
3439 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3440 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3441 #ifdef MAC
3442 out:
3443 #endif
3444 	NDFREE(&nd, NDF_ONLY_PNBUF);
3445 	vput(nd.ni_dvp);
3446 	if (!error)
3447 		vput(nd.ni_vp);
3448 	vn_finished_write(mp);
3449 	VFS_UNLOCK_GIANT(vfslocked);
3450 	return (error);
3451 }
3452 
3453 /*
3454  * Remove a directory file.
3455  */
3456 #ifndef _SYS_SYSPROTO_H_
3457 struct rmdir_args {
3458 	char	*path;
3459 };
3460 #endif
3461 int
3462 rmdir(td, uap)
3463 	struct thread *td;
3464 	struct rmdir_args /* {
3465 		char *path;
3466 	} */ *uap;
3467 {
3468 
3469 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3470 }
3471 
3472 int
3473 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3474 {
3475 	struct mount *mp;
3476 	struct vnode *vp;
3477 	int error;
3478 	struct nameidata nd;
3479 	int vfslocked;
3480 
3481 restart:
3482 	bwillwrite();
3483 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3484 	    pathseg, path, td);
3485 	if ((error = namei(&nd)) != 0)
3486 		return (error);
3487 	vfslocked = NDHASGIANT(&nd);
3488 	vp = nd.ni_vp;
3489 	if (vp->v_type != VDIR) {
3490 		error = ENOTDIR;
3491 		goto out;
3492 	}
3493 	/*
3494 	 * No rmdir "." please.
3495 	 */
3496 	if (nd.ni_dvp == vp) {
3497 		error = EINVAL;
3498 		goto out;
3499 	}
3500 	/*
3501 	 * The root of a mounted filesystem cannot be deleted.
3502 	 */
3503 	if (vp->v_vflag & VV_ROOT) {
3504 		error = EBUSY;
3505 		goto out;
3506 	}
3507 #ifdef MAC
3508 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3509 	    &nd.ni_cnd);
3510 	if (error)
3511 		goto out;
3512 #endif
3513 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3514 		NDFREE(&nd, NDF_ONLY_PNBUF);
3515 		vput(vp);
3516 		if (nd.ni_dvp == vp)
3517 			vrele(nd.ni_dvp);
3518 		else
3519 			vput(nd.ni_dvp);
3520 		VFS_UNLOCK_GIANT(vfslocked);
3521 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3522 			return (error);
3523 		goto restart;
3524 	}
3525 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3526 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3527 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3528 	vn_finished_write(mp);
3529 out:
3530 	NDFREE(&nd, NDF_ONLY_PNBUF);
3531 	vput(vp);
3532 	if (nd.ni_dvp == vp)
3533 		vrele(nd.ni_dvp);
3534 	else
3535 		vput(nd.ni_dvp);
3536 	VFS_UNLOCK_GIANT(vfslocked);
3537 	return (error);
3538 }
3539 
3540 #ifdef COMPAT_43
3541 /*
3542  * Read a block of directory entries in a filesystem independent format.
3543  */
3544 #ifndef _SYS_SYSPROTO_H_
3545 struct ogetdirentries_args {
3546 	int	fd;
3547 	char	*buf;
3548 	u_int	count;
3549 	long	*basep;
3550 };
3551 #endif
3552 int
3553 ogetdirentries(td, uap)
3554 	struct thread *td;
3555 	register struct ogetdirentries_args /* {
3556 		int fd;
3557 		char *buf;
3558 		u_int count;
3559 		long *basep;
3560 	} */ *uap;
3561 {
3562 	struct vnode *vp;
3563 	struct file *fp;
3564 	struct uio auio, kuio;
3565 	struct iovec aiov, kiov;
3566 	struct dirent *dp, *edp;
3567 	caddr_t dirbuf;
3568 	int error, eofflag, readcnt, vfslocked;
3569 	long loff;
3570 
3571 	/* XXX arbitrary sanity limit on `count'. */
3572 	if (uap->count > 64 * 1024)
3573 		return (EINVAL);
3574 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3575 		return (error);
3576 	if ((fp->f_flag & FREAD) == 0) {
3577 		fdrop(fp, td);
3578 		return (EBADF);
3579 	}
3580 	vp = fp->f_vnode;
3581 unionread:
3582 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3583 	if (vp->v_type != VDIR) {
3584 		VFS_UNLOCK_GIANT(vfslocked);
3585 		fdrop(fp, td);
3586 		return (EINVAL);
3587 	}
3588 	aiov.iov_base = uap->buf;
3589 	aiov.iov_len = uap->count;
3590 	auio.uio_iov = &aiov;
3591 	auio.uio_iovcnt = 1;
3592 	auio.uio_rw = UIO_READ;
3593 	auio.uio_segflg = UIO_USERSPACE;
3594 	auio.uio_td = td;
3595 	auio.uio_resid = uap->count;
3596 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3597 	loff = auio.uio_offset = fp->f_offset;
3598 #ifdef MAC
3599 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3600 	if (error) {
3601 		VOP_UNLOCK(vp, 0, td);
3602 		VFS_UNLOCK_GIANT(vfslocked);
3603 		fdrop(fp, td);
3604 		return (error);
3605 	}
3606 #endif
3607 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3608 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3609 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3610 			    NULL, NULL);
3611 			fp->f_offset = auio.uio_offset;
3612 		} else
3613 #	endif
3614 	{
3615 		kuio = auio;
3616 		kuio.uio_iov = &kiov;
3617 		kuio.uio_segflg = UIO_SYSSPACE;
3618 		kiov.iov_len = uap->count;
3619 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3620 		kiov.iov_base = dirbuf;
3621 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3622 			    NULL, NULL);
3623 		fp->f_offset = kuio.uio_offset;
3624 		if (error == 0) {
3625 			readcnt = uap->count - kuio.uio_resid;
3626 			edp = (struct dirent *)&dirbuf[readcnt];
3627 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3628 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3629 					/*
3630 					 * The expected low byte of
3631 					 * dp->d_namlen is our dp->d_type.
3632 					 * The high MBZ byte of dp->d_namlen
3633 					 * is our dp->d_namlen.
3634 					 */
3635 					dp->d_type = dp->d_namlen;
3636 					dp->d_namlen = 0;
3637 #				else
3638 					/*
3639 					 * The dp->d_type is the high byte
3640 					 * of the expected dp->d_namlen,
3641 					 * so must be zero'ed.
3642 					 */
3643 					dp->d_type = 0;
3644 #				endif
3645 				if (dp->d_reclen > 0) {
3646 					dp = (struct dirent *)
3647 					    ((char *)dp + dp->d_reclen);
3648 				} else {
3649 					error = EIO;
3650 					break;
3651 				}
3652 			}
3653 			if (dp >= edp)
3654 				error = uiomove(dirbuf, readcnt, &auio);
3655 		}
3656 		FREE(dirbuf, M_TEMP);
3657 	}
3658 	if (error) {
3659 		VOP_UNLOCK(vp, 0, td);
3660 		VFS_UNLOCK_GIANT(vfslocked);
3661 		fdrop(fp, td);
3662 		return (error);
3663 	}
3664 	if (uap->count == auio.uio_resid &&
3665 	    (vp->v_vflag & VV_ROOT) &&
3666 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3667 		struct vnode *tvp = vp;
3668 		vp = vp->v_mount->mnt_vnodecovered;
3669 		VREF(vp);
3670 		fp->f_vnode = vp;
3671 		fp->f_data = vp;
3672 		fp->f_offset = 0;
3673 		vput(tvp);
3674 		VFS_UNLOCK_GIANT(vfslocked);
3675 		goto unionread;
3676 	}
3677 	VOP_UNLOCK(vp, 0, td);
3678 	VFS_UNLOCK_GIANT(vfslocked);
3679 	error = copyout(&loff, uap->basep, sizeof(long));
3680 	fdrop(fp, td);
3681 	td->td_retval[0] = uap->count - auio.uio_resid;
3682 	return (error);
3683 }
3684 #endif /* COMPAT_43 */
3685 
3686 /*
3687  * Read a block of directory entries in a filesystem independent format.
3688  */
3689 #ifndef _SYS_SYSPROTO_H_
3690 struct getdirentries_args {
3691 	int	fd;
3692 	char	*buf;
3693 	u_int	count;
3694 	long	*basep;
3695 };
3696 #endif
3697 int
3698 getdirentries(td, uap)
3699 	struct thread *td;
3700 	register struct getdirentries_args /* {
3701 		int fd;
3702 		char *buf;
3703 		u_int count;
3704 		long *basep;
3705 	} */ *uap;
3706 {
3707 	struct vnode *vp;
3708 	struct file *fp;
3709 	struct uio auio;
3710 	struct iovec aiov;
3711 	int vfslocked;
3712 	long loff;
3713 	int error, eofflag;
3714 
3715 	AUDIT_ARG(fd, uap->fd);
3716 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3717 		return (error);
3718 	if ((fp->f_flag & FREAD) == 0) {
3719 		fdrop(fp, td);
3720 		return (EBADF);
3721 	}
3722 	vp = fp->f_vnode;
3723 unionread:
3724 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3725 	if (vp->v_type != VDIR) {
3726 		VFS_UNLOCK_GIANT(vfslocked);
3727 		error = EINVAL;
3728 		goto fail;
3729 	}
3730 	aiov.iov_base = uap->buf;
3731 	aiov.iov_len = uap->count;
3732 	auio.uio_iov = &aiov;
3733 	auio.uio_iovcnt = 1;
3734 	auio.uio_rw = UIO_READ;
3735 	auio.uio_segflg = UIO_USERSPACE;
3736 	auio.uio_td = td;
3737 	auio.uio_resid = uap->count;
3738 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3739 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3740 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3741 	loff = auio.uio_offset = fp->f_offset;
3742 #ifdef MAC
3743 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3744 	if (error == 0)
3745 #endif
3746 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3747 		    NULL);
3748 	fp->f_offset = auio.uio_offset;
3749 	if (error) {
3750 		VOP_UNLOCK(vp, 0, td);
3751 		VFS_UNLOCK_GIANT(vfslocked);
3752 		goto fail;
3753 	}
3754 	if (uap->count == auio.uio_resid &&
3755 	    (vp->v_vflag & VV_ROOT) &&
3756 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3757 		struct vnode *tvp = vp;
3758 		vp = vp->v_mount->mnt_vnodecovered;
3759 		VREF(vp);
3760 		fp->f_vnode = vp;
3761 		fp->f_data = vp;
3762 		fp->f_offset = 0;
3763 		vput(tvp);
3764 		VFS_UNLOCK_GIANT(vfslocked);
3765 		goto unionread;
3766 	}
3767 	VOP_UNLOCK(vp, 0, td);
3768 	VFS_UNLOCK_GIANT(vfslocked);
3769 	if (uap->basep != NULL) {
3770 		error = copyout(&loff, uap->basep, sizeof(long));
3771 	}
3772 	td->td_retval[0] = uap->count - auio.uio_resid;
3773 fail:
3774 	fdrop(fp, td);
3775 	return (error);
3776 }
3777 #ifndef _SYS_SYSPROTO_H_
3778 struct getdents_args {
3779 	int fd;
3780 	char *buf;
3781 	size_t count;
3782 };
3783 #endif
3784 int
3785 getdents(td, uap)
3786 	struct thread *td;
3787 	register struct getdents_args /* {
3788 		int fd;
3789 		char *buf;
3790 		u_int count;
3791 	} */ *uap;
3792 {
3793 	struct getdirentries_args ap;
3794 	ap.fd = uap->fd;
3795 	ap.buf = uap->buf;
3796 	ap.count = uap->count;
3797 	ap.basep = NULL;
3798 	return (getdirentries(td, &ap));
3799 }
3800 
3801 /*
3802  * Set the mode mask for creation of filesystem nodes.
3803  *
3804  * MP SAFE
3805  */
3806 #ifndef _SYS_SYSPROTO_H_
3807 struct umask_args {
3808 	int	newmask;
3809 };
3810 #endif
3811 int
3812 umask(td, uap)
3813 	struct thread *td;
3814 	struct umask_args /* {
3815 		int newmask;
3816 	} */ *uap;
3817 {
3818 	register struct filedesc *fdp;
3819 
3820 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3821 	fdp = td->td_proc->p_fd;
3822 	td->td_retval[0] = fdp->fd_cmask;
3823 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3824 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3825 	return (0);
3826 }
3827 
3828 /*
3829  * Void all references to file by ripping underlying filesystem
3830  * away from vnode.
3831  */
3832 #ifndef _SYS_SYSPROTO_H_
3833 struct revoke_args {
3834 	char	*path;
3835 };
3836 #endif
3837 int
3838 revoke(td, uap)
3839 	struct thread *td;
3840 	register struct revoke_args /* {
3841 		char *path;
3842 	} */ *uap;
3843 {
3844 	struct vnode *vp;
3845 	struct vattr vattr;
3846 	int error;
3847 	struct nameidata nd;
3848 	int vfslocked;
3849 
3850 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3851 	    UIO_USERSPACE, uap->path, td);
3852 	if ((error = namei(&nd)) != 0)
3853 		return (error);
3854 	vfslocked = NDHASGIANT(&nd);
3855 	vp = nd.ni_vp;
3856 	NDFREE(&nd, NDF_ONLY_PNBUF);
3857 	if (vp->v_type != VCHR) {
3858 		error = EINVAL;
3859 		goto out;
3860 	}
3861 #ifdef MAC
3862 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3863 	if (error)
3864 		goto out;
3865 #endif
3866 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3867 	if (error)
3868 		goto out;
3869 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3870 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3871 		    SUSER_ALLOWJAIL);
3872 		if (error)
3873 			goto out;
3874 	}
3875 	if (vcount(vp) > 1)
3876 		VOP_REVOKE(vp, REVOKEALL);
3877 out:
3878 	vput(vp);
3879 	VFS_UNLOCK_GIANT(vfslocked);
3880 	return (error);
3881 }
3882 
3883 /*
3884  * Convert a user file descriptor to a kernel file entry.
3885  * A reference on the file entry is held upon returning.
3886  */
3887 int
3888 getvnode(fdp, fd, fpp)
3889 	struct filedesc *fdp;
3890 	int fd;
3891 	struct file **fpp;
3892 {
3893 	int error;
3894 	struct file *fp;
3895 
3896 	fp = NULL;
3897 	if (fdp == NULL)
3898 		error = EBADF;
3899 	else {
3900 		FILEDESC_LOCK(fdp);
3901 		if ((u_int)fd >= fdp->fd_nfiles ||
3902 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3903 			error = EBADF;
3904 		else if (fp->f_vnode == NULL) {
3905 			fp = NULL;
3906 			error = EINVAL;
3907 		} else {
3908 			fhold(fp);
3909 			error = 0;
3910 		}
3911 		FILEDESC_UNLOCK(fdp);
3912 	}
3913 	*fpp = fp;
3914 	return (error);
3915 }
3916 
3917 /*
3918  * Get (NFS) file handle
3919  */
3920 #ifndef _SYS_SYSPROTO_H_
3921 struct lgetfh_args {
3922 	char	*fname;
3923 	fhandle_t *fhp;
3924 };
3925 #endif
3926 int
3927 lgetfh(td, uap)
3928 	struct thread *td;
3929 	register struct lgetfh_args *uap;
3930 {
3931 	struct nameidata nd;
3932 	fhandle_t fh;
3933 	register struct vnode *vp;
3934 	int vfslocked;
3935 	int error;
3936 
3937 	error = priv_check(td, PRIV_VFS_GETFH);
3938 	if (error)
3939 		return (error);
3940 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3941 	    UIO_USERSPACE, uap->fname, td);
3942 	error = namei(&nd);
3943 	if (error)
3944 		return (error);
3945 	vfslocked = NDHASGIANT(&nd);
3946 	NDFREE(&nd, NDF_ONLY_PNBUF);
3947 	vp = nd.ni_vp;
3948 	bzero(&fh, sizeof(fh));
3949 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3950 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3951 	vput(vp);
3952 	VFS_UNLOCK_GIANT(vfslocked);
3953 	if (error)
3954 		return (error);
3955 	error = copyout(&fh, uap->fhp, sizeof (fh));
3956 	return (error);
3957 }
3958 
3959 #ifndef _SYS_SYSPROTO_H_
3960 struct getfh_args {
3961 	char	*fname;
3962 	fhandle_t *fhp;
3963 };
3964 #endif
3965 int
3966 getfh(td, uap)
3967 	struct thread *td;
3968 	register struct getfh_args *uap;
3969 {
3970 	struct nameidata nd;
3971 	fhandle_t fh;
3972 	register struct vnode *vp;
3973 	int vfslocked;
3974 	int error;
3975 
3976 	error = priv_check(td, PRIV_VFS_GETFH);
3977 	if (error)
3978 		return (error);
3979 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3980 	    UIO_USERSPACE, uap->fname, td);
3981 	error = namei(&nd);
3982 	if (error)
3983 		return (error);
3984 	vfslocked = NDHASGIANT(&nd);
3985 	NDFREE(&nd, NDF_ONLY_PNBUF);
3986 	vp = nd.ni_vp;
3987 	bzero(&fh, sizeof(fh));
3988 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3989 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3990 	vput(vp);
3991 	VFS_UNLOCK_GIANT(vfslocked);
3992 	if (error)
3993 		return (error);
3994 	error = copyout(&fh, uap->fhp, sizeof (fh));
3995 	return (error);
3996 }
3997 
3998 /*
3999  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4000  * open descriptor.
4001  *
4002  * warning: do not remove the priv_check() call or this becomes one giant
4003  * security hole.
4004  *
4005  * MP SAFE
4006  */
4007 #ifndef _SYS_SYSPROTO_H_
4008 struct fhopen_args {
4009 	const struct fhandle *u_fhp;
4010 	int flags;
4011 };
4012 #endif
4013 int
4014 fhopen(td, uap)
4015 	struct thread *td;
4016 	struct fhopen_args /* {
4017 		const struct fhandle *u_fhp;
4018 		int flags;
4019 	} */ *uap;
4020 {
4021 	struct proc *p = td->td_proc;
4022 	struct mount *mp;
4023 	struct vnode *vp;
4024 	struct fhandle fhp;
4025 	struct vattr vat;
4026 	struct vattr *vap = &vat;
4027 	struct flock lf;
4028 	struct file *fp;
4029 	register struct filedesc *fdp = p->p_fd;
4030 	int fmode, mode, error, type;
4031 	struct file *nfp;
4032 	int vfslocked;
4033 	int indx;
4034 
4035 	error = priv_check(td, PRIV_VFS_FHOPEN);
4036 	if (error)
4037 		return (error);
4038 	fmode = FFLAGS(uap->flags);
4039 	/* why not allow a non-read/write open for our lockd? */
4040 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4041 		return (EINVAL);
4042 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4043 	if (error)
4044 		return(error);
4045 	/* find the mount point */
4046 	mp = vfs_getvfs(&fhp.fh_fsid);
4047 	if (mp == NULL)
4048 		return (ESTALE);
4049 	vfslocked = VFS_LOCK_GIANT(mp);
4050 	/* now give me my vnode, it gets returned to me locked */
4051 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4052 	if (error)
4053 		goto out;
4054 	/*
4055 	 * from now on we have to make sure not
4056 	 * to forget about the vnode
4057 	 * any error that causes an abort must vput(vp)
4058 	 * just set error = err and 'goto bad;'.
4059 	 */
4060 
4061 	/*
4062 	 * from vn_open
4063 	 */
4064 	if (vp->v_type == VLNK) {
4065 		error = EMLINK;
4066 		goto bad;
4067 	}
4068 	if (vp->v_type == VSOCK) {
4069 		error = EOPNOTSUPP;
4070 		goto bad;
4071 	}
4072 	mode = 0;
4073 	if (fmode & (FWRITE | O_TRUNC)) {
4074 		if (vp->v_type == VDIR) {
4075 			error = EISDIR;
4076 			goto bad;
4077 		}
4078 		error = vn_writechk(vp);
4079 		if (error)
4080 			goto bad;
4081 		mode |= VWRITE;
4082 	}
4083 	if (fmode & FREAD)
4084 		mode |= VREAD;
4085 	if (fmode & O_APPEND)
4086 		mode |= VAPPEND;
4087 #ifdef MAC
4088 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4089 	if (error)
4090 		goto bad;
4091 #endif
4092 	if (mode) {
4093 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4094 		if (error)
4095 			goto bad;
4096 	}
4097 	if (fmode & O_TRUNC) {
4098 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4099 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4100 			vrele(vp);
4101 			goto out;
4102 		}
4103 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4104 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4105 #ifdef MAC
4106 		/*
4107 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4108 		 * should be right.
4109 		 */
4110 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4111 		if (error == 0) {
4112 #endif
4113 			VATTR_NULL(vap);
4114 			vap->va_size = 0;
4115 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4116 #ifdef MAC
4117 		}
4118 #endif
4119 		vn_finished_write(mp);
4120 		if (error)
4121 			goto bad;
4122 	}
4123 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4124 	if (error)
4125 		goto bad;
4126 
4127 	if (fmode & FWRITE)
4128 		vp->v_writecount++;
4129 
4130 	/*
4131 	 * end of vn_open code
4132 	 */
4133 
4134 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4135 		if (fmode & FWRITE)
4136 			vp->v_writecount--;
4137 		goto bad;
4138 	}
4139 	/* An extra reference on `nfp' has been held for us by falloc(). */
4140 	fp = nfp;
4141 
4142 	nfp->f_vnode = vp;
4143 	nfp->f_data = vp;
4144 	nfp->f_flag = fmode & FMASK;
4145 	nfp->f_ops = &vnops;
4146 	nfp->f_type = DTYPE_VNODE;
4147 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4148 		lf.l_whence = SEEK_SET;
4149 		lf.l_start = 0;
4150 		lf.l_len = 0;
4151 		if (fmode & O_EXLOCK)
4152 			lf.l_type = F_WRLCK;
4153 		else
4154 			lf.l_type = F_RDLCK;
4155 		type = F_FLOCK;
4156 		if ((fmode & FNONBLOCK) == 0)
4157 			type |= F_WAIT;
4158 		VOP_UNLOCK(vp, 0, td);
4159 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4160 			    type)) != 0) {
4161 			/*
4162 			 * The lock request failed.  Normally close the
4163 			 * descriptor but handle the case where someone might
4164 			 * have dup()d or close()d it when we weren't looking.
4165 			 */
4166 			fdclose(fdp, fp, indx, td);
4167 
4168 			/*
4169 			 * release our private reference
4170 			 */
4171 			fdrop(fp, td);
4172 			goto out;
4173 		}
4174 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4175 		fp->f_flag |= FHASLOCK;
4176 	}
4177 
4178 	VOP_UNLOCK(vp, 0, td);
4179 	fdrop(fp, td);
4180 	vfs_rel(mp);
4181 	VFS_UNLOCK_GIANT(vfslocked);
4182 	td->td_retval[0] = indx;
4183 	return (0);
4184 
4185 bad:
4186 	vput(vp);
4187 out:
4188 	vfs_rel(mp);
4189 	VFS_UNLOCK_GIANT(vfslocked);
4190 	return (error);
4191 }
4192 
4193 /*
4194  * Stat an (NFS) file handle.
4195  *
4196  * MP SAFE
4197  */
4198 #ifndef _SYS_SYSPROTO_H_
4199 struct fhstat_args {
4200 	struct fhandle *u_fhp;
4201 	struct stat *sb;
4202 };
4203 #endif
4204 int
4205 fhstat(td, uap)
4206 	struct thread *td;
4207 	register struct fhstat_args /* {
4208 		struct fhandle *u_fhp;
4209 		struct stat *sb;
4210 	} */ *uap;
4211 {
4212 	struct stat sb;
4213 	fhandle_t fh;
4214 	struct mount *mp;
4215 	struct vnode *vp;
4216 	int vfslocked;
4217 	int error;
4218 
4219 	error = priv_check(td, PRIV_VFS_FHSTAT);
4220 	if (error)
4221 		return (error);
4222 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4223 	if (error)
4224 		return (error);
4225 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4226 		return (ESTALE);
4227 	vfslocked = VFS_LOCK_GIANT(mp);
4228 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4229 		vfs_rel(mp);
4230 		VFS_UNLOCK_GIANT(vfslocked);
4231 		return (error);
4232 	}
4233 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4234 	vput(vp);
4235 	vfs_rel(mp);
4236 	VFS_UNLOCK_GIANT(vfslocked);
4237 	if (error)
4238 		return (error);
4239 	error = copyout(&sb, uap->sb, sizeof(sb));
4240 	return (error);
4241 }
4242 
4243 /*
4244  * Implement fstatfs() for (NFS) file handles.
4245  *
4246  * MP SAFE
4247  */
4248 #ifndef _SYS_SYSPROTO_H_
4249 struct fhstatfs_args {
4250 	struct fhandle *u_fhp;
4251 	struct statfs *buf;
4252 };
4253 #endif
4254 int
4255 fhstatfs(td, uap)
4256 	struct thread *td;
4257 	struct fhstatfs_args /* {
4258 		struct fhandle *u_fhp;
4259 		struct statfs *buf;
4260 	} */ *uap;
4261 {
4262 	struct statfs sf;
4263 	fhandle_t fh;
4264 	int error;
4265 
4266 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4267 	if (error)
4268 		return (error);
4269 	error = kern_fhstatfs(td, fh, &sf);
4270 	if (error)
4271 		return (error);
4272 	return (copyout(&sf, uap->buf, sizeof(sf)));
4273 }
4274 
4275 int
4276 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4277 {
4278 	struct statfs *sp;
4279 	struct mount *mp;
4280 	struct vnode *vp;
4281 	int vfslocked;
4282 	int error;
4283 
4284 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4285 	if (error)
4286 		return (error);
4287 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4288 		return (ESTALE);
4289 	vfslocked = VFS_LOCK_GIANT(mp);
4290 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4291 	if (error) {
4292 		VFS_UNLOCK_GIANT(vfslocked);
4293 		vfs_rel(mp);
4294 		return (error);
4295 	}
4296 	vput(vp);
4297 	error = prison_canseemount(td->td_ucred, mp);
4298 	if (error)
4299 		goto out;
4300 #ifdef MAC
4301 	error = mac_check_mount_stat(td->td_ucred, mp);
4302 	if (error)
4303 		goto out;
4304 #endif
4305 	/*
4306 	 * Set these in case the underlying filesystem fails to do so.
4307 	 */
4308 	sp = &mp->mnt_stat;
4309 	sp->f_version = STATFS_VERSION;
4310 	sp->f_namemax = NAME_MAX;
4311 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4312 	error = VFS_STATFS(mp, sp, td);
4313 	if (error == 0)
4314 		*buf = *sp;
4315 out:
4316 	vfs_rel(mp);
4317 	VFS_UNLOCK_GIANT(vfslocked);
4318 	return (error);
4319 }
4320