xref: /freebsd/sys/kern/vfs_syscalls.c (revision f3bb407b7c1b3faa88d0580541f01a8e6fb6cc68)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/limits.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/priv.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/jail.h>
67 #include <sys/syscallsubr.h>
68 #include <sys/sysctl.h>
69 
70 #include <machine/stdarg.h>
71 
72 #include <security/audit/audit.h>
73 #include <security/mac/mac_framework.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 /*
91  * The module initialization routine for POSIX asynchronous I/O will
92  * set this to the version of AIO that it implements.  (Zero means
93  * that it is not implemented.)  This value is used here by pathconf()
94  * and in kern_descrip.c by fpathconf().
95  */
96 int async_io_version;
97 
98 #ifdef DEBUG
99 static int syncprt = 0;
100 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
101 #endif
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 /* ARGSUSED */
112 int
113 sync(td, uap)
114 	struct thread *td;
115 	struct sync_args *uap;
116 {
117 	struct mount *mp, *nmp;
118 	int vfslocked;
119 
120 	mtx_lock(&mountlist_mtx);
121 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
122 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
123 			nmp = TAILQ_NEXT(mp, mnt_list);
124 			continue;
125 		}
126 		vfslocked = VFS_LOCK_GIANT(mp);
127 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
128 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
129 			MNT_ILOCK(mp);
130 			mp->mnt_noasync++;
131 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
132 			MNT_IUNLOCK(mp);
133 			vfs_msync(mp, MNT_NOWAIT);
134 			VFS_SYNC(mp, MNT_NOWAIT, td);
135 			MNT_ILOCK(mp);
136 			mp->mnt_noasync--;
137 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
138 			    mp->mnt_noasync == 0)
139 				mp->mnt_kern_flag |= MNTK_ASYNC;
140 			MNT_IUNLOCK(mp);
141 			vn_finished_write(mp);
142 		}
143 		VFS_UNLOCK_GIANT(vfslocked);
144 		mtx_lock(&mountlist_mtx);
145 		nmp = TAILQ_NEXT(mp, mnt_list);
146 		vfs_unbusy(mp, td);
147 	}
148 	mtx_unlock(&mountlist_mtx);
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  */
161 #ifndef _SYS_SYSPROTO_H_
162 struct quotactl_args {
163 	char *path;
164 	int cmd;
165 	int uid;
166 	caddr_t arg;
167 };
168 #endif
169 int
170 quotactl(td, uap)
171 	struct thread *td;
172 	register struct quotactl_args /* {
173 		char *path;
174 		int cmd;
175 		int uid;
176 		caddr_t arg;
177 	} */ *uap;
178 {
179 	struct mount *mp, *vmp;
180 	int vfslocked;
181 	int error;
182 	struct nameidata nd;
183 
184 	AUDIT_ARG(cmd, uap->cmd);
185 	AUDIT_ARG(uid, uap->uid);
186 	if (jailed(td->td_ucred) && !prison_quotas)
187 		return (EPERM);
188 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
189 	   UIO_USERSPACE, uap->path, td);
190 	if ((error = namei(&nd)) != 0)
191 		return (error);
192 	vfslocked = NDHASGIANT(&nd);
193 	NDFREE(&nd, NDF_ONLY_PNBUF);
194 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
195 	mp = nd.ni_vp->v_mount;
196 	vrele(nd.ni_vp);
197 	if (error)
198 		goto out;
199 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
200 	vn_finished_write(vmp);
201 out:
202 	VFS_UNLOCK_GIANT(vfslocked);
203 	return (error);
204 }
205 
206 /*
207  * Get filesystem statistics.
208  */
209 #ifndef _SYS_SYSPROTO_H_
210 struct statfs_args {
211 	char *path;
212 	struct statfs *buf;
213 };
214 #endif
215 int
216 statfs(td, uap)
217 	struct thread *td;
218 	register struct statfs_args /* {
219 		char *path;
220 		struct statfs *buf;
221 	} */ *uap;
222 {
223 	struct statfs sf;
224 	int error;
225 
226 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
227 	if (error == 0)
228 		error = copyout(&sf, uap->buf, sizeof(sf));
229 	return (error);
230 }
231 
232 int
233 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
234     struct statfs *buf)
235 {
236 	struct mount *mp;
237 	struct statfs *sp, sb;
238 	int vfslocked;
239 	int error;
240 	struct nameidata nd;
241 
242 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
243 	    pathseg, path, td);
244 	error = namei(&nd);
245 	if (error)
246 		return (error);
247 	vfslocked = NDHASGIANT(&nd);
248 	mp = nd.ni_vp->v_mount;
249 	vfs_ref(mp);
250 	NDFREE(&nd, NDF_ONLY_PNBUF);
251 	vput(nd.ni_vp);
252 #ifdef MAC
253 	error = mac_check_mount_stat(td->td_ucred, mp);
254 	if (error)
255 		goto out;
256 #endif
257 	/*
258 	 * Set these in case the underlying filesystem fails to do so.
259 	 */
260 	sp = &mp->mnt_stat;
261 	sp->f_version = STATFS_VERSION;
262 	sp->f_namemax = NAME_MAX;
263 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
264 	error = VFS_STATFS(mp, sp, td);
265 	if (error)
266 		goto out;
267 	if (priv_check(td, PRIV_VFS_GENERATION)) {
268 		bcopy(sp, &sb, sizeof(sb));
269 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
270 		prison_enforce_statfs(td->td_ucred, mp, &sb);
271 		sp = &sb;
272 	}
273 	*buf = *sp;
274 out:
275 	vfs_rel(mp);
276 	VFS_UNLOCK_GIANT(vfslocked);
277 	if (mtx_owned(&Giant))
278 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
279 	return (error);
280 }
281 
282 /*
283  * Get filesystem statistics.
284  */
285 #ifndef _SYS_SYSPROTO_H_
286 struct fstatfs_args {
287 	int fd;
288 	struct statfs *buf;
289 };
290 #endif
291 int
292 fstatfs(td, uap)
293 	struct thread *td;
294 	register struct fstatfs_args /* {
295 		int fd;
296 		struct statfs *buf;
297 	} */ *uap;
298 {
299 	struct statfs sf;
300 	int error;
301 
302 	error = kern_fstatfs(td, uap->fd, &sf);
303 	if (error == 0)
304 		error = copyout(&sf, uap->buf, sizeof(sf));
305 	return (error);
306 }
307 
308 int
309 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
310 {
311 	struct file *fp;
312 	struct mount *mp;
313 	struct statfs *sp, sb;
314 	int vfslocked;
315 	struct vnode *vp;
316 	int error;
317 
318 	AUDIT_ARG(fd, fd);
319 	error = getvnode(td->td_proc->p_fd, fd, &fp);
320 	if (error)
321 		return (error);
322 	vp = fp->f_vnode;
323 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
324 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
325 #ifdef AUDIT
326 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
327 #endif
328 	mp = vp->v_mount;
329 	if (mp)
330 		vfs_ref(mp);
331 	VOP_UNLOCK(vp, 0, td);
332 	fdrop(fp, td);
333 	if (vp->v_iflag & VI_DOOMED) {
334 		error = EBADF;
335 		goto out;
336 	}
337 #ifdef MAC
338 	error = mac_check_mount_stat(td->td_ucred, mp);
339 	if (error)
340 		goto out;
341 #endif
342 	/*
343 	 * Set these in case the underlying filesystem fails to do so.
344 	 */
345 	sp = &mp->mnt_stat;
346 	sp->f_version = STATFS_VERSION;
347 	sp->f_namemax = NAME_MAX;
348 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
349 	error = VFS_STATFS(mp, sp, td);
350 	if (error)
351 		goto out;
352 	if (priv_check(td, PRIV_VFS_GENERATION)) {
353 		bcopy(sp, &sb, sizeof(sb));
354 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
355 		prison_enforce_statfs(td->td_ucred, mp, &sb);
356 		sp = &sb;
357 	}
358 	*buf = *sp;
359 out:
360 	if (mp)
361 		vfs_rel(mp);
362 	VFS_UNLOCK_GIANT(vfslocked);
363 	return (error);
364 }
365 
366 /*
367  * Get statistics on all filesystems.
368  */
369 #ifndef _SYS_SYSPROTO_H_
370 struct getfsstat_args {
371 	struct statfs *buf;
372 	long bufsize;
373 	int flags;
374 };
375 #endif
376 int
377 getfsstat(td, uap)
378 	struct thread *td;
379 	register struct getfsstat_args /* {
380 		struct statfs *buf;
381 		long bufsize;
382 		int flags;
383 	} */ *uap;
384 {
385 
386 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
387 	    uap->flags));
388 }
389 
390 /*
391  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
392  * 	The caller is responsible for freeing memory which will be allocated
393  *	in '*buf'.
394  */
395 int
396 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
397     enum uio_seg bufseg, int flags)
398 {
399 	struct mount *mp, *nmp;
400 	struct statfs *sfsp, *sp, sb;
401 	size_t count, maxcount;
402 	int vfslocked;
403 	int error;
404 
405 	maxcount = bufsize / sizeof(struct statfs);
406 	if (bufsize == 0)
407 		sfsp = NULL;
408 	else if (bufseg == UIO_USERSPACE)
409 		sfsp = *buf;
410 	else /* if (bufseg == UIO_SYSSPACE) */ {
411 		count = 0;
412 		mtx_lock(&mountlist_mtx);
413 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
414 			count++;
415 		}
416 		mtx_unlock(&mountlist_mtx);
417 		if (maxcount > count)
418 			maxcount = count;
419 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
420 		    M_WAITOK);
421 	}
422 	count = 0;
423 	mtx_lock(&mountlist_mtx);
424 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
425 		if (prison_canseemount(td->td_ucred, mp) != 0) {
426 			nmp = TAILQ_NEXT(mp, mnt_list);
427 			continue;
428 		}
429 #ifdef MAC
430 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
431 			nmp = TAILQ_NEXT(mp, mnt_list);
432 			continue;
433 		}
434 #endif
435 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
436 			nmp = TAILQ_NEXT(mp, mnt_list);
437 			continue;
438 		}
439 		vfslocked = VFS_LOCK_GIANT(mp);
440 		if (sfsp && count < maxcount) {
441 			sp = &mp->mnt_stat;
442 			/*
443 			 * Set these in case the underlying filesystem
444 			 * fails to do so.
445 			 */
446 			sp->f_version = STATFS_VERSION;
447 			sp->f_namemax = NAME_MAX;
448 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
449 			/*
450 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
451 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
452 			 * overrides MNT_WAIT.
453 			 */
454 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
455 			    (flags & MNT_WAIT)) &&
456 			    (error = VFS_STATFS(mp, sp, td))) {
457 				VFS_UNLOCK_GIANT(vfslocked);
458 				mtx_lock(&mountlist_mtx);
459 				nmp = TAILQ_NEXT(mp, mnt_list);
460 				vfs_unbusy(mp, td);
461 				continue;
462 			}
463 			if (priv_check(td, PRIV_VFS_GENERATION)) {
464 				bcopy(sp, &sb, sizeof(sb));
465 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
466 				prison_enforce_statfs(td->td_ucred, mp, &sb);
467 				sp = &sb;
468 			}
469 			if (bufseg == UIO_SYSSPACE)
470 				bcopy(sp, sfsp, sizeof(*sp));
471 			else /* if (bufseg == UIO_USERSPACE) */ {
472 				error = copyout(sp, sfsp, sizeof(*sp));
473 				if (error) {
474 					vfs_unbusy(mp, td);
475 					VFS_UNLOCK_GIANT(vfslocked);
476 					return (error);
477 				}
478 			}
479 			sfsp++;
480 		}
481 		VFS_UNLOCK_GIANT(vfslocked);
482 		count++;
483 		mtx_lock(&mountlist_mtx);
484 		nmp = TAILQ_NEXT(mp, mnt_list);
485 		vfs_unbusy(mp, td);
486 	}
487 	mtx_unlock(&mountlist_mtx);
488 	if (sfsp && count > maxcount)
489 		td->td_retval[0] = maxcount;
490 	else
491 		td->td_retval[0] = count;
492 	return (0);
493 }
494 
495 #ifdef COMPAT_FREEBSD4
496 /*
497  * Get old format filesystem statistics.
498  */
499 static void cvtstatfs(struct statfs *, struct ostatfs *);
500 
501 #ifndef _SYS_SYSPROTO_H_
502 struct freebsd4_statfs_args {
503 	char *path;
504 	struct ostatfs *buf;
505 };
506 #endif
507 int
508 freebsd4_statfs(td, uap)
509 	struct thread *td;
510 	struct freebsd4_statfs_args /* {
511 		char *path;
512 		struct ostatfs *buf;
513 	} */ *uap;
514 {
515 	struct ostatfs osb;
516 	struct statfs sf;
517 	int error;
518 
519 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
520 	if (error)
521 		return (error);
522 	cvtstatfs(&sf, &osb);
523 	return (copyout(&osb, uap->buf, sizeof(osb)));
524 }
525 
526 /*
527  * Get filesystem statistics.
528  */
529 #ifndef _SYS_SYSPROTO_H_
530 struct freebsd4_fstatfs_args {
531 	int fd;
532 	struct ostatfs *buf;
533 };
534 #endif
535 int
536 freebsd4_fstatfs(td, uap)
537 	struct thread *td;
538 	struct freebsd4_fstatfs_args /* {
539 		int fd;
540 		struct ostatfs *buf;
541 	} */ *uap;
542 {
543 	struct ostatfs osb;
544 	struct statfs sf;
545 	int error;
546 
547 	error = kern_fstatfs(td, uap->fd, &sf);
548 	if (error)
549 		return (error);
550 	cvtstatfs(&sf, &osb);
551 	return (copyout(&osb, uap->buf, sizeof(osb)));
552 }
553 
554 /*
555  * Get statistics on all filesystems.
556  */
557 #ifndef _SYS_SYSPROTO_H_
558 struct freebsd4_getfsstat_args {
559 	struct ostatfs *buf;
560 	long bufsize;
561 	int flags;
562 };
563 #endif
564 int
565 freebsd4_getfsstat(td, uap)
566 	struct thread *td;
567 	register struct freebsd4_getfsstat_args /* {
568 		struct ostatfs *buf;
569 		long bufsize;
570 		int flags;
571 	} */ *uap;
572 {
573 	struct statfs *buf, *sp;
574 	struct ostatfs osb;
575 	size_t count, size;
576 	int error;
577 
578 	count = uap->bufsize / sizeof(struct ostatfs);
579 	size = count * sizeof(struct statfs);
580 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
581 	if (size > 0) {
582 		count = td->td_retval[0];
583 		sp = buf;
584 		while (count > 0 && error == 0) {
585 			cvtstatfs(sp, &osb);
586 			error = copyout(&osb, uap->buf, sizeof(osb));
587 			sp++;
588 			uap->buf++;
589 			count--;
590 		}
591 		free(buf, M_TEMP);
592 	}
593 	return (error);
594 }
595 
596 /*
597  * Implement fstatfs() for (NFS) file handles.
598  */
599 #ifndef _SYS_SYSPROTO_H_
600 struct freebsd4_fhstatfs_args {
601 	struct fhandle *u_fhp;
602 	struct ostatfs *buf;
603 };
604 #endif
605 int
606 freebsd4_fhstatfs(td, uap)
607 	struct thread *td;
608 	struct freebsd4_fhstatfs_args /* {
609 		struct fhandle *u_fhp;
610 		struct ostatfs *buf;
611 	} */ *uap;
612 {
613 	struct ostatfs osb;
614 	struct statfs sf;
615 	fhandle_t fh;
616 	int error;
617 
618 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
619 	if (error)
620 		return (error);
621 	error = kern_fhstatfs(td, fh, &sf);
622 	if (error)
623 		return (error);
624 	cvtstatfs(&sf, &osb);
625 	return (copyout(&osb, uap->buf, sizeof(osb)));
626 }
627 
628 /*
629  * Convert a new format statfs structure to an old format statfs structure.
630  */
631 static void
632 cvtstatfs(nsp, osp)
633 	struct statfs *nsp;
634 	struct ostatfs *osp;
635 {
636 
637 	bzero(osp, sizeof(*osp));
638 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
639 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
640 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
641 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
642 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
643 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
644 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
645 	osp->f_owner = nsp->f_owner;
646 	osp->f_type = nsp->f_type;
647 	osp->f_flags = nsp->f_flags;
648 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
649 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
650 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
651 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
652 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
653 	    MIN(MFSNAMELEN, OMFSNAMELEN));
654 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
655 	    MIN(MNAMELEN, OMNAMELEN));
656 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
657 	    MIN(MNAMELEN, OMNAMELEN));
658 	osp->f_fsid = nsp->f_fsid;
659 }
660 #endif /* COMPAT_FREEBSD4 */
661 
662 /*
663  * Change current working directory to a given file descriptor.
664  */
665 #ifndef _SYS_SYSPROTO_H_
666 struct fchdir_args {
667 	int	fd;
668 };
669 #endif
670 int
671 fchdir(td, uap)
672 	struct thread *td;
673 	struct fchdir_args /* {
674 		int fd;
675 	} */ *uap;
676 {
677 	register struct filedesc *fdp = td->td_proc->p_fd;
678 	struct vnode *vp, *tdp, *vpold;
679 	struct mount *mp;
680 	struct file *fp;
681 	int vfslocked;
682 	int error;
683 
684 	AUDIT_ARG(fd, uap->fd);
685 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
686 		return (error);
687 	vp = fp->f_vnode;
688 	VREF(vp);
689 	fdrop(fp, td);
690 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
691 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
692 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
693 	error = change_dir(vp, td);
694 	while (!error && (mp = vp->v_mountedhere) != NULL) {
695 		int tvfslocked;
696 		if (vfs_busy(mp, 0, 0, td))
697 			continue;
698 		tvfslocked = VFS_LOCK_GIANT(mp);
699 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
700 		vfs_unbusy(mp, td);
701 		if (error) {
702 			VFS_UNLOCK_GIANT(tvfslocked);
703 			break;
704 		}
705 		vput(vp);
706 		VFS_UNLOCK_GIANT(vfslocked);
707 		vp = tdp;
708 		vfslocked = tvfslocked;
709 	}
710 	if (error) {
711 		vput(vp);
712 		VFS_UNLOCK_GIANT(vfslocked);
713 		return (error);
714 	}
715 	VOP_UNLOCK(vp, 0, td);
716 	VFS_UNLOCK_GIANT(vfslocked);
717 	FILEDESC_LOCK_FAST(fdp);
718 	vpold = fdp->fd_cdir;
719 	fdp->fd_cdir = vp;
720 	FILEDESC_UNLOCK_FAST(fdp);
721 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
722 	vrele(vpold);
723 	VFS_UNLOCK_GIANT(vfslocked);
724 	return (0);
725 }
726 
727 /*
728  * Change current working directory (``.'').
729  */
730 #ifndef _SYS_SYSPROTO_H_
731 struct chdir_args {
732 	char	*path;
733 };
734 #endif
735 int
736 chdir(td, uap)
737 	struct thread *td;
738 	struct chdir_args /* {
739 		char *path;
740 	} */ *uap;
741 {
742 
743 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
744 }
745 
746 int
747 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
748 {
749 	register struct filedesc *fdp = td->td_proc->p_fd;
750 	int error;
751 	struct nameidata nd;
752 	struct vnode *vp;
753 	int vfslocked;
754 
755 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
756 	    pathseg, path, td);
757 	if ((error = namei(&nd)) != 0)
758 		return (error);
759 	vfslocked = NDHASGIANT(&nd);
760 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
761 		vput(nd.ni_vp);
762 		VFS_UNLOCK_GIANT(vfslocked);
763 		NDFREE(&nd, NDF_ONLY_PNBUF);
764 		return (error);
765 	}
766 	VOP_UNLOCK(nd.ni_vp, 0, td);
767 	VFS_UNLOCK_GIANT(vfslocked);
768 	NDFREE(&nd, NDF_ONLY_PNBUF);
769 	FILEDESC_LOCK_FAST(fdp);
770 	vp = fdp->fd_cdir;
771 	fdp->fd_cdir = nd.ni_vp;
772 	FILEDESC_UNLOCK_FAST(fdp);
773 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
774 	vrele(vp);
775 	VFS_UNLOCK_GIANT(vfslocked);
776 	return (0);
777 }
778 
779 /*
780  * Helper function for raised chroot(2) security function:  Refuse if
781  * any filedescriptors are open directories.
782  */
783 static int
784 chroot_refuse_vdir_fds(fdp)
785 	struct filedesc *fdp;
786 {
787 	struct vnode *vp;
788 	struct file *fp;
789 	int fd;
790 
791 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
792 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
793 		fp = fget_locked(fdp, fd);
794 		if (fp == NULL)
795 			continue;
796 		if (fp->f_type == DTYPE_VNODE) {
797 			vp = fp->f_vnode;
798 			if (vp->v_type == VDIR)
799 				return (EPERM);
800 		}
801 	}
802 	return (0);
803 }
804 
805 /*
806  * This sysctl determines if we will allow a process to chroot(2) if it
807  * has a directory open:
808  *	0: disallowed for all processes.
809  *	1: allowed for processes that were not already chroot(2)'ed.
810  *	2: allowed for all processes.
811  */
812 
813 static int chroot_allow_open_directories = 1;
814 
815 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
816      &chroot_allow_open_directories, 0, "");
817 
818 /*
819  * Change notion of root (``/'') directory.
820  */
821 #ifndef _SYS_SYSPROTO_H_
822 struct chroot_args {
823 	char	*path;
824 };
825 #endif
826 int
827 chroot(td, uap)
828 	struct thread *td;
829 	struct chroot_args /* {
830 		char *path;
831 	} */ *uap;
832 {
833 	int error;
834 	struct nameidata nd;
835 	int vfslocked;
836 
837 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
838 	    SUSER_ALLOWJAIL);
839 	if (error)
840 		return (error);
841 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
842 	    UIO_USERSPACE, uap->path, td);
843 	error = namei(&nd);
844 	if (error)
845 		goto error;
846 	vfslocked = NDHASGIANT(&nd);
847 	if ((error = change_dir(nd.ni_vp, td)) != 0)
848 		goto e_vunlock;
849 #ifdef MAC
850 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
851 		goto e_vunlock;
852 #endif
853 	VOP_UNLOCK(nd.ni_vp, 0, td);
854 	error = change_root(nd.ni_vp, td);
855 	vrele(nd.ni_vp);
856 	VFS_UNLOCK_GIANT(vfslocked);
857 	NDFREE(&nd, NDF_ONLY_PNBUF);
858 	return (error);
859 e_vunlock:
860 	vput(nd.ni_vp);
861 	VFS_UNLOCK_GIANT(vfslocked);
862 error:
863 	NDFREE(&nd, NDF_ONLY_PNBUF);
864 	return (error);
865 }
866 
867 /*
868  * Common routine for chroot and chdir.  Callers must provide a locked vnode
869  * instance.
870  */
871 int
872 change_dir(vp, td)
873 	struct vnode *vp;
874 	struct thread *td;
875 {
876 	int error;
877 
878 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
879 	if (vp->v_type != VDIR)
880 		return (ENOTDIR);
881 #ifdef MAC
882 	error = mac_check_vnode_chdir(td->td_ucred, vp);
883 	if (error)
884 		return (error);
885 #endif
886 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
887 	return (error);
888 }
889 
890 /*
891  * Common routine for kern_chroot() and jail_attach().  The caller is
892  * responsible for invoking priv_check() and mac_check_chroot() to authorize
893  * this operation.
894  */
895 int
896 change_root(vp, td)
897 	struct vnode *vp;
898 	struct thread *td;
899 {
900 	struct filedesc *fdp;
901 	struct vnode *oldvp;
902 	int vfslocked;
903 	int error;
904 
905 	VFS_ASSERT_GIANT(vp->v_mount);
906 	fdp = td->td_proc->p_fd;
907 	FILEDESC_LOCK(fdp);
908 	if (chroot_allow_open_directories == 0 ||
909 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
910 		error = chroot_refuse_vdir_fds(fdp);
911 		if (error) {
912 			FILEDESC_UNLOCK(fdp);
913 			return (error);
914 		}
915 	}
916 	oldvp = fdp->fd_rdir;
917 	fdp->fd_rdir = vp;
918 	VREF(fdp->fd_rdir);
919 	if (!fdp->fd_jdir) {
920 		fdp->fd_jdir = vp;
921 		VREF(fdp->fd_jdir);
922 	}
923 	FILEDESC_UNLOCK(fdp);
924 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
925 	vrele(oldvp);
926 	VFS_UNLOCK_GIANT(vfslocked);
927 	return (0);
928 }
929 
930 /*
931  * Check permissions, allocate an open file structure, and call the device
932  * open routine if any.
933  */
934 #ifndef _SYS_SYSPROTO_H_
935 struct open_args {
936 	char	*path;
937 	int	flags;
938 	int	mode;
939 };
940 #endif
941 int
942 open(td, uap)
943 	struct thread *td;
944 	register struct open_args /* {
945 		char *path;
946 		int flags;
947 		int mode;
948 	} */ *uap;
949 {
950 
951 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
952 }
953 
954 int
955 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
956     int mode)
957 {
958 	struct proc *p = td->td_proc;
959 	struct filedesc *fdp = p->p_fd;
960 	struct file *fp;
961 	struct vnode *vp;
962 	struct vattr vat;
963 	struct mount *mp;
964 	int cmode;
965 	struct file *nfp;
966 	int type, indx, error;
967 	struct flock lf;
968 	struct nameidata nd;
969 	int vfslocked;
970 
971 	AUDIT_ARG(fflags, flags);
972 	AUDIT_ARG(mode, mode);
973 	if ((flags & O_ACCMODE) == O_ACCMODE)
974 		return (EINVAL);
975 	flags = FFLAGS(flags);
976 	error = falloc(td, &nfp, &indx);
977 	if (error)
978 		return (error);
979 	/* An extra reference on `nfp' has been held for us by falloc(). */
980 	fp = nfp;
981 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
982 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
983 	td->td_dupfd = -1;		/* XXX check for fdopen */
984 	error = vn_open(&nd, &flags, cmode, indx);
985 	if (error) {
986 		/*
987 		 * If the vn_open replaced the method vector, something
988 		 * wonderous happened deep below and we just pass it up
989 		 * pretending we know what we do.
990 		 */
991 		if (error == ENXIO && fp->f_ops != &badfileops) {
992 			fdrop(fp, td);
993 			td->td_retval[0] = indx;
994 			return (0);
995 		}
996 
997 		/*
998 		 * release our own reference
999 		 */
1000 		fdrop(fp, td);
1001 
1002 		/*
1003 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1004 		 * responsible for dropping the old contents of ofiles[indx]
1005 		 * if it succeeds.
1006 		 */
1007 		if ((error == ENODEV || error == ENXIO) &&
1008 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1009 		    (error =
1010 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1011 			td->td_retval[0] = indx;
1012 			return (0);
1013 		}
1014 		/*
1015 		 * Clean up the descriptor, but only if another thread hadn't
1016 		 * replaced or closed it.
1017 		 */
1018 		fdclose(fdp, fp, indx, td);
1019 
1020 		if (error == ERESTART)
1021 			error = EINTR;
1022 		return (error);
1023 	}
1024 	td->td_dupfd = 0;
1025 	vfslocked = NDHASGIANT(&nd);
1026 	NDFREE(&nd, NDF_ONLY_PNBUF);
1027 	vp = nd.ni_vp;
1028 
1029 	/*
1030 	 * There should be 2 references on the file, one from the descriptor
1031 	 * table, and one for us.
1032 	 *
1033 	 * Handle the case where someone closed the file (via its file
1034 	 * descriptor) while we were blocked.  The end result should look
1035 	 * like opening the file succeeded but it was immediately closed.
1036 	 * We call vn_close() manually because we haven't yet hooked up
1037 	 * the various 'struct file' fields.
1038 	 */
1039 	FILEDESC_LOCK(fdp);
1040 	FILE_LOCK(fp);
1041 	if (fp->f_count == 1) {
1042 		mp = vp->v_mount;
1043 		KASSERT(fdp->fd_ofiles[indx] != fp,
1044 		    ("Open file descriptor lost all refs"));
1045 		FILE_UNLOCK(fp);
1046 		FILEDESC_UNLOCK(fdp);
1047 		VOP_UNLOCK(vp, 0, td);
1048 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1049 		VFS_UNLOCK_GIANT(vfslocked);
1050 		fdrop(fp, td);
1051 		td->td_retval[0] = indx;
1052 		return (0);
1053 	}
1054 	fp->f_vnode = vp;
1055 	if (fp->f_data == NULL)
1056 		fp->f_data = vp;
1057 	fp->f_flag = flags & FMASK;
1058 	if (fp->f_ops == &badfileops)
1059 		fp->f_ops = &vnops;
1060 	fp->f_seqcount = 1;
1061 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1062 	FILE_UNLOCK(fp);
1063 	FILEDESC_UNLOCK(fdp);
1064 
1065 	VOP_UNLOCK(vp, 0, td);
1066 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1067 		lf.l_whence = SEEK_SET;
1068 		lf.l_start = 0;
1069 		lf.l_len = 0;
1070 		if (flags & O_EXLOCK)
1071 			lf.l_type = F_WRLCK;
1072 		else
1073 			lf.l_type = F_RDLCK;
1074 		type = F_FLOCK;
1075 		if ((flags & FNONBLOCK) == 0)
1076 			type |= F_WAIT;
1077 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1078 			    type)) != 0)
1079 			goto bad;
1080 		fp->f_flag |= FHASLOCK;
1081 	}
1082 	if (flags & O_TRUNC) {
1083 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1084 			goto bad;
1085 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1086 		VATTR_NULL(&vat);
1087 		vat.va_size = 0;
1088 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1089 #ifdef MAC
1090 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1091 		if (error == 0)
1092 #endif
1093 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1094 		VOP_UNLOCK(vp, 0, td);
1095 		vn_finished_write(mp);
1096 		if (error)
1097 			goto bad;
1098 	}
1099 	VFS_UNLOCK_GIANT(vfslocked);
1100 	/*
1101 	 * Release our private reference, leaving the one associated with
1102 	 * the descriptor table intact.
1103 	 */
1104 	fdrop(fp, td);
1105 	td->td_retval[0] = indx;
1106 	return (0);
1107 bad:
1108 	VFS_UNLOCK_GIANT(vfslocked);
1109 	fdclose(fdp, fp, indx, td);
1110 	fdrop(fp, td);
1111 	return (error);
1112 }
1113 
1114 #ifdef COMPAT_43
1115 /*
1116  * Create a file.
1117  */
1118 #ifndef _SYS_SYSPROTO_H_
1119 struct ocreat_args {
1120 	char	*path;
1121 	int	mode;
1122 };
1123 #endif
1124 int
1125 ocreat(td, uap)
1126 	struct thread *td;
1127 	register struct ocreat_args /* {
1128 		char *path;
1129 		int mode;
1130 	} */ *uap;
1131 {
1132 
1133 	return (kern_open(td, uap->path, UIO_USERSPACE,
1134 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1135 }
1136 #endif /* COMPAT_43 */
1137 
1138 /*
1139  * Create a special file.
1140  */
1141 #ifndef _SYS_SYSPROTO_H_
1142 struct mknod_args {
1143 	char	*path;
1144 	int	mode;
1145 	int	dev;
1146 };
1147 #endif
1148 int
1149 mknod(td, uap)
1150 	struct thread *td;
1151 	register struct mknod_args /* {
1152 		char *path;
1153 		int mode;
1154 		int dev;
1155 	} */ *uap;
1156 {
1157 
1158 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1159 }
1160 
1161 int
1162 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1163     int dev)
1164 {
1165 	struct vnode *vp;
1166 	struct mount *mp;
1167 	struct vattr vattr;
1168 	int error;
1169 	int whiteout = 0;
1170 	struct nameidata nd;
1171 	int vfslocked;
1172 
1173 	AUDIT_ARG(mode, mode);
1174 	AUDIT_ARG(dev, dev);
1175 	switch (mode & S_IFMT) {
1176 	case S_IFCHR:
1177 	case S_IFBLK:
1178 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1179 		break;
1180 	case S_IFMT:
1181 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1182 		break;
1183 	case S_IFWHT:
1184 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1185 		break;
1186 	default:
1187 		error = EINVAL;
1188 		break;
1189 	}
1190 	if (error)
1191 		return (error);
1192 restart:
1193 	bwillwrite();
1194 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1195 	    pathseg, path, td);
1196 	if ((error = namei(&nd)) != 0)
1197 		return (error);
1198 	vfslocked = NDHASGIANT(&nd);
1199 	vp = nd.ni_vp;
1200 	if (vp != NULL) {
1201 		NDFREE(&nd, NDF_ONLY_PNBUF);
1202 		if (vp == nd.ni_dvp)
1203 			vrele(nd.ni_dvp);
1204 		else
1205 			vput(nd.ni_dvp);
1206 		vrele(vp);
1207 		VFS_UNLOCK_GIANT(vfslocked);
1208 		return (EEXIST);
1209 	} else {
1210 		VATTR_NULL(&vattr);
1211 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1212 		vattr.va_mode = (mode & ALLPERMS) &
1213 		    ~td->td_proc->p_fd->fd_cmask;
1214 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1215 		vattr.va_rdev = dev;
1216 		whiteout = 0;
1217 
1218 		switch (mode & S_IFMT) {
1219 		case S_IFMT:	/* used by badsect to flag bad sectors */
1220 			vattr.va_type = VBAD;
1221 			break;
1222 		case S_IFCHR:
1223 			vattr.va_type = VCHR;
1224 			break;
1225 		case S_IFBLK:
1226 			vattr.va_type = VBLK;
1227 			break;
1228 		case S_IFWHT:
1229 			whiteout = 1;
1230 			break;
1231 		default:
1232 			panic("kern_mknod: invalid mode");
1233 		}
1234 	}
1235 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1236 		NDFREE(&nd, NDF_ONLY_PNBUF);
1237 		vput(nd.ni_dvp);
1238 		VFS_UNLOCK_GIANT(vfslocked);
1239 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1240 			return (error);
1241 		goto restart;
1242 	}
1243 #ifdef MAC
1244 	if (error == 0 && !whiteout)
1245 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1246 		    &nd.ni_cnd, &vattr);
1247 #endif
1248 	if (!error) {
1249 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1250 		if (whiteout)
1251 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1252 		else {
1253 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1254 						&nd.ni_cnd, &vattr);
1255 			if (error == 0)
1256 				vput(nd.ni_vp);
1257 		}
1258 	}
1259 	NDFREE(&nd, NDF_ONLY_PNBUF);
1260 	vput(nd.ni_dvp);
1261 	vn_finished_write(mp);
1262 	VFS_UNLOCK_GIANT(vfslocked);
1263 	return (error);
1264 }
1265 
1266 /*
1267  * Create a named pipe.
1268  */
1269 #ifndef _SYS_SYSPROTO_H_
1270 struct mkfifo_args {
1271 	char	*path;
1272 	int	mode;
1273 };
1274 #endif
1275 int
1276 mkfifo(td, uap)
1277 	struct thread *td;
1278 	register struct mkfifo_args /* {
1279 		char *path;
1280 		int mode;
1281 	} */ *uap;
1282 {
1283 
1284 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1285 }
1286 
1287 int
1288 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1289 {
1290 	struct mount *mp;
1291 	struct vattr vattr;
1292 	int error;
1293 	struct nameidata nd;
1294 	int vfslocked;
1295 
1296 	AUDIT_ARG(mode, mode);
1297 restart:
1298 	bwillwrite();
1299 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1300 	    pathseg, path, td);
1301 	if ((error = namei(&nd)) != 0)
1302 		return (error);
1303 	vfslocked = NDHASGIANT(&nd);
1304 	if (nd.ni_vp != NULL) {
1305 		NDFREE(&nd, NDF_ONLY_PNBUF);
1306 		if (nd.ni_vp == nd.ni_dvp)
1307 			vrele(nd.ni_dvp);
1308 		else
1309 			vput(nd.ni_dvp);
1310 		vrele(nd.ni_vp);
1311 		VFS_UNLOCK_GIANT(vfslocked);
1312 		return (EEXIST);
1313 	}
1314 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1315 		NDFREE(&nd, NDF_ONLY_PNBUF);
1316 		vput(nd.ni_dvp);
1317 		VFS_UNLOCK_GIANT(vfslocked);
1318 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1319 			return (error);
1320 		goto restart;
1321 	}
1322 	VATTR_NULL(&vattr);
1323 	vattr.va_type = VFIFO;
1324 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1325 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1326 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1327 #ifdef MAC
1328 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1329 	    &vattr);
1330 	if (error)
1331 		goto out;
1332 #endif
1333 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1334 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1335 	if (error == 0)
1336 		vput(nd.ni_vp);
1337 #ifdef MAC
1338 out:
1339 #endif
1340 	vput(nd.ni_dvp);
1341 	vn_finished_write(mp);
1342 	VFS_UNLOCK_GIANT(vfslocked);
1343 	NDFREE(&nd, NDF_ONLY_PNBUF);
1344 	return (error);
1345 }
1346 
1347 /*
1348  * Make a hard file link.
1349  */
1350 #ifndef _SYS_SYSPROTO_H_
1351 struct link_args {
1352 	char	*path;
1353 	char	*link;
1354 };
1355 #endif
1356 int
1357 link(td, uap)
1358 	struct thread *td;
1359 	register struct link_args /* {
1360 		char *path;
1361 		char *link;
1362 	} */ *uap;
1363 {
1364 	int error;
1365 
1366 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1367 	return (error);
1368 }
1369 
1370 static int hardlink_check_uid = 0;
1371 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1372     &hardlink_check_uid, 0,
1373     "Unprivileged processes cannot create hard links to files owned by other "
1374     "users");
1375 static int hardlink_check_gid = 0;
1376 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1377     &hardlink_check_gid, 0,
1378     "Unprivileged processes cannot create hard links to files owned by other "
1379     "groups");
1380 
1381 static int
1382 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1383 {
1384 	struct vattr va;
1385 	int error;
1386 
1387 	if (!hardlink_check_uid && !hardlink_check_gid)
1388 		return (0);
1389 
1390 	error = VOP_GETATTR(vp, &va, cred, td);
1391 	if (error != 0)
1392 		return (error);
1393 
1394 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1395 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1396 		    SUSER_ALLOWJAIL);
1397 		if (error)
1398 			return (error);
1399 	}
1400 
1401 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1402 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1403 		    SUSER_ALLOWJAIL);
1404 		if (error)
1405 			return (error);
1406 	}
1407 
1408 	return (0);
1409 }
1410 
1411 int
1412 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1413 {
1414 	struct vnode *vp;
1415 	struct mount *mp;
1416 	struct nameidata nd;
1417 	int vfslocked;
1418 	int lvfslocked;
1419 	int error;
1420 
1421 	bwillwrite();
1422 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1423 	if ((error = namei(&nd)) != 0)
1424 		return (error);
1425 	vfslocked = NDHASGIANT(&nd);
1426 	NDFREE(&nd, NDF_ONLY_PNBUF);
1427 	vp = nd.ni_vp;
1428 	if (vp->v_type == VDIR) {
1429 		vrele(vp);
1430 		VFS_UNLOCK_GIANT(vfslocked);
1431 		return (EPERM);		/* POSIX */
1432 	}
1433 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1434 		vrele(vp);
1435 		VFS_UNLOCK_GIANT(vfslocked);
1436 		return (error);
1437 	}
1438 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1439 	    segflg, link, td);
1440 	if ((error = namei(&nd)) == 0) {
1441 		lvfslocked = NDHASGIANT(&nd);
1442 		if (nd.ni_vp != NULL) {
1443 			if (nd.ni_dvp == nd.ni_vp)
1444 				vrele(nd.ni_dvp);
1445 			else
1446 				vput(nd.ni_dvp);
1447 			vrele(nd.ni_vp);
1448 			error = EEXIST;
1449 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1450 		    == 0) {
1451 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1452 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1453 			error = can_hardlink(vp, td, td->td_ucred);
1454 			if (error == 0)
1455 #ifdef MAC
1456 				error = mac_check_vnode_link(td->td_ucred,
1457 				    nd.ni_dvp, vp, &nd.ni_cnd);
1458 			if (error == 0)
1459 #endif
1460 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1461 			VOP_UNLOCK(vp, 0, td);
1462 			vput(nd.ni_dvp);
1463 		}
1464 		NDFREE(&nd, NDF_ONLY_PNBUF);
1465 		VFS_UNLOCK_GIANT(lvfslocked);
1466 	}
1467 	vrele(vp);
1468 	vn_finished_write(mp);
1469 	VFS_UNLOCK_GIANT(vfslocked);
1470 	return (error);
1471 }
1472 
1473 /*
1474  * Make a symbolic link.
1475  */
1476 #ifndef _SYS_SYSPROTO_H_
1477 struct symlink_args {
1478 	char	*path;
1479 	char	*link;
1480 };
1481 #endif
1482 int
1483 symlink(td, uap)
1484 	struct thread *td;
1485 	register struct symlink_args /* {
1486 		char *path;
1487 		char *link;
1488 	} */ *uap;
1489 {
1490 
1491 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1492 }
1493 
1494 int
1495 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1496 {
1497 	struct mount *mp;
1498 	struct vattr vattr;
1499 	char *syspath;
1500 	int error;
1501 	struct nameidata nd;
1502 	int vfslocked;
1503 
1504 	if (segflg == UIO_SYSSPACE) {
1505 		syspath = path;
1506 	} else {
1507 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1508 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1509 			goto out;
1510 	}
1511 	AUDIT_ARG(text, syspath);
1512 restart:
1513 	bwillwrite();
1514 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1515 	    segflg, link, td);
1516 	if ((error = namei(&nd)) != 0)
1517 		goto out;
1518 	vfslocked = NDHASGIANT(&nd);
1519 	if (nd.ni_vp) {
1520 		NDFREE(&nd, NDF_ONLY_PNBUF);
1521 		if (nd.ni_vp == nd.ni_dvp)
1522 			vrele(nd.ni_dvp);
1523 		else
1524 			vput(nd.ni_dvp);
1525 		vrele(nd.ni_vp);
1526 		VFS_UNLOCK_GIANT(vfslocked);
1527 		error = EEXIST;
1528 		goto out;
1529 	}
1530 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1531 		NDFREE(&nd, NDF_ONLY_PNBUF);
1532 		vput(nd.ni_dvp);
1533 		VFS_UNLOCK_GIANT(vfslocked);
1534 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1535 			goto out;
1536 		goto restart;
1537 	}
1538 	VATTR_NULL(&vattr);
1539 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1540 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1541 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1542 #ifdef MAC
1543 	vattr.va_type = VLNK;
1544 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1545 	    &vattr);
1546 	if (error)
1547 		goto out2;
1548 #endif
1549 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1550 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1551 	if (error == 0)
1552 		vput(nd.ni_vp);
1553 #ifdef MAC
1554 out2:
1555 #endif
1556 	NDFREE(&nd, NDF_ONLY_PNBUF);
1557 	vput(nd.ni_dvp);
1558 	vn_finished_write(mp);
1559 	VFS_UNLOCK_GIANT(vfslocked);
1560 out:
1561 	if (segflg != UIO_SYSSPACE)
1562 		uma_zfree(namei_zone, syspath);
1563 	return (error);
1564 }
1565 
1566 /*
1567  * Delete a whiteout from the filesystem.
1568  */
1569 int
1570 undelete(td, uap)
1571 	struct thread *td;
1572 	register struct undelete_args /* {
1573 		char *path;
1574 	} */ *uap;
1575 {
1576 	int error;
1577 	struct mount *mp;
1578 	struct nameidata nd;
1579 	int vfslocked;
1580 
1581 restart:
1582 	bwillwrite();
1583 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1584 	    UIO_USERSPACE, uap->path, td);
1585 	error = namei(&nd);
1586 	if (error)
1587 		return (error);
1588 	vfslocked = NDHASGIANT(&nd);
1589 
1590 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1591 		NDFREE(&nd, NDF_ONLY_PNBUF);
1592 		if (nd.ni_vp == nd.ni_dvp)
1593 			vrele(nd.ni_dvp);
1594 		else
1595 			vput(nd.ni_dvp);
1596 		if (nd.ni_vp)
1597 			vrele(nd.ni_vp);
1598 		VFS_UNLOCK_GIANT(vfslocked);
1599 		return (EEXIST);
1600 	}
1601 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1602 		NDFREE(&nd, NDF_ONLY_PNBUF);
1603 		vput(nd.ni_dvp);
1604 		VFS_UNLOCK_GIANT(vfslocked);
1605 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1606 			return (error);
1607 		goto restart;
1608 	}
1609 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1610 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1611 	NDFREE(&nd, NDF_ONLY_PNBUF);
1612 	vput(nd.ni_dvp);
1613 	vn_finished_write(mp);
1614 	VFS_UNLOCK_GIANT(vfslocked);
1615 	return (error);
1616 }
1617 
1618 /*
1619  * Delete a name from the filesystem.
1620  */
1621 #ifndef _SYS_SYSPROTO_H_
1622 struct unlink_args {
1623 	char	*path;
1624 };
1625 #endif
1626 int
1627 unlink(td, uap)
1628 	struct thread *td;
1629 	struct unlink_args /* {
1630 		char *path;
1631 	} */ *uap;
1632 {
1633 	int error;
1634 
1635 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1636 	return (error);
1637 }
1638 
1639 int
1640 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1641 {
1642 	struct mount *mp;
1643 	struct vnode *vp;
1644 	int error;
1645 	struct nameidata nd;
1646 	int vfslocked;
1647 
1648 restart:
1649 	bwillwrite();
1650 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1651 	    pathseg, path, td);
1652 	if ((error = namei(&nd)) != 0)
1653 		return (error == EINVAL ? EPERM : error);
1654 	vfslocked = NDHASGIANT(&nd);
1655 	vp = nd.ni_vp;
1656 	if (vp->v_type == VDIR)
1657 		error = EPERM;		/* POSIX */
1658 	else {
1659 		/*
1660 		 * The root of a mounted filesystem cannot be deleted.
1661 		 *
1662 		 * XXX: can this only be a VDIR case?
1663 		 */
1664 		if (vp->v_vflag & VV_ROOT)
1665 			error = EBUSY;
1666 	}
1667 	if (error == 0) {
1668 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1669 			NDFREE(&nd, NDF_ONLY_PNBUF);
1670 			vput(nd.ni_dvp);
1671 			if (vp == nd.ni_dvp)
1672 				vrele(vp);
1673 			else
1674 				vput(vp);
1675 			VFS_UNLOCK_GIANT(vfslocked);
1676 			if ((error = vn_start_write(NULL, &mp,
1677 			    V_XSLEEP | PCATCH)) != 0)
1678 				return (error);
1679 			goto restart;
1680 		}
1681 #ifdef MAC
1682 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1683 		    &nd.ni_cnd);
1684 		if (error)
1685 			goto out;
1686 #endif
1687 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1688 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1689 #ifdef MAC
1690 out:
1691 #endif
1692 		vn_finished_write(mp);
1693 	}
1694 	NDFREE(&nd, NDF_ONLY_PNBUF);
1695 	vput(nd.ni_dvp);
1696 	if (vp == nd.ni_dvp)
1697 		vrele(vp);
1698 	else
1699 		vput(vp);
1700 	VFS_UNLOCK_GIANT(vfslocked);
1701 	return (error);
1702 }
1703 
1704 /*
1705  * Reposition read/write file offset.
1706  */
1707 #ifndef _SYS_SYSPROTO_H_
1708 struct lseek_args {
1709 	int	fd;
1710 	int	pad;
1711 	off_t	offset;
1712 	int	whence;
1713 };
1714 #endif
1715 int
1716 lseek(td, uap)
1717 	struct thread *td;
1718 	register struct lseek_args /* {
1719 		int fd;
1720 		int pad;
1721 		off_t offset;
1722 		int whence;
1723 	} */ *uap;
1724 {
1725 	struct ucred *cred = td->td_ucred;
1726 	struct file *fp;
1727 	struct vnode *vp;
1728 	struct vattr vattr;
1729 	off_t offset;
1730 	int error, noneg;
1731 	int vfslocked;
1732 
1733 	if ((error = fget(td, uap->fd, &fp)) != 0)
1734 		return (error);
1735 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1736 		fdrop(fp, td);
1737 		return (ESPIPE);
1738 	}
1739 	vp = fp->f_vnode;
1740 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1741 	noneg = (vp->v_type != VCHR);
1742 	offset = uap->offset;
1743 	switch (uap->whence) {
1744 	case L_INCR:
1745 		if (noneg &&
1746 		    (fp->f_offset < 0 ||
1747 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1748 			error = EOVERFLOW;
1749 			break;
1750 		}
1751 		offset += fp->f_offset;
1752 		break;
1753 	case L_XTND:
1754 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1755 		error = VOP_GETATTR(vp, &vattr, cred, td);
1756 		VOP_UNLOCK(vp, 0, td);
1757 		if (error)
1758 			break;
1759 		if (noneg &&
1760 		    (vattr.va_size > OFF_MAX ||
1761 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1762 			error = EOVERFLOW;
1763 			break;
1764 		}
1765 		offset += vattr.va_size;
1766 		break;
1767 	case L_SET:
1768 		break;
1769 	default:
1770 		error = EINVAL;
1771 	}
1772 	if (error == 0 && noneg && offset < 0)
1773 		error = EINVAL;
1774 	if (error != 0)
1775 		goto drop;
1776 	fp->f_offset = offset;
1777 	*(off_t *)(td->td_retval) = fp->f_offset;
1778 drop:
1779 	fdrop(fp, td);
1780 	VFS_UNLOCK_GIANT(vfslocked);
1781 	return (error);
1782 }
1783 
1784 #if defined(COMPAT_43)
1785 /*
1786  * Reposition read/write file offset.
1787  */
1788 #ifndef _SYS_SYSPROTO_H_
1789 struct olseek_args {
1790 	int	fd;
1791 	long	offset;
1792 	int	whence;
1793 };
1794 #endif
1795 int
1796 olseek(td, uap)
1797 	struct thread *td;
1798 	register struct olseek_args /* {
1799 		int fd;
1800 		long offset;
1801 		int whence;
1802 	} */ *uap;
1803 {
1804 	struct lseek_args /* {
1805 		int fd;
1806 		int pad;
1807 		off_t offset;
1808 		int whence;
1809 	} */ nuap;
1810 	int error;
1811 
1812 	nuap.fd = uap->fd;
1813 	nuap.offset = uap->offset;
1814 	nuap.whence = uap->whence;
1815 	error = lseek(td, &nuap);
1816 	return (error);
1817 }
1818 #endif /* COMPAT_43 */
1819 
1820 /*
1821  * Check access permissions using passed credentials.
1822  */
1823 static int
1824 vn_access(vp, user_flags, cred, td)
1825 	struct vnode	*vp;
1826 	int		user_flags;
1827 	struct ucred	*cred;
1828 	struct thread	*td;
1829 {
1830 	int error, flags;
1831 
1832 	/* Flags == 0 means only check for existence. */
1833 	error = 0;
1834 	if (user_flags) {
1835 		flags = 0;
1836 		if (user_flags & R_OK)
1837 			flags |= VREAD;
1838 		if (user_flags & W_OK)
1839 			flags |= VWRITE;
1840 		if (user_flags & X_OK)
1841 			flags |= VEXEC;
1842 #ifdef MAC
1843 		error = mac_check_vnode_access(cred, vp, flags);
1844 		if (error)
1845 			return (error);
1846 #endif
1847 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1848 			error = VOP_ACCESS(vp, flags, cred, td);
1849 	}
1850 	return (error);
1851 }
1852 
1853 /*
1854  * Check access permissions using "real" credentials.
1855  */
1856 #ifndef _SYS_SYSPROTO_H_
1857 struct access_args {
1858 	char	*path;
1859 	int	flags;
1860 };
1861 #endif
1862 int
1863 access(td, uap)
1864 	struct thread *td;
1865 	register struct access_args /* {
1866 		char *path;
1867 		int flags;
1868 	} */ *uap;
1869 {
1870 
1871 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1872 }
1873 
1874 int
1875 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1876 {
1877 	struct ucred *cred, *tmpcred;
1878 	register struct vnode *vp;
1879 	struct nameidata nd;
1880 	int vfslocked;
1881 	int error;
1882 
1883 	/*
1884 	 * Create and modify a temporary credential instead of one that
1885 	 * is potentially shared.  This could also mess up socket
1886 	 * buffer accounting which can run in an interrupt context.
1887 	 */
1888 	cred = td->td_ucred;
1889 	tmpcred = crdup(cred);
1890 	tmpcred->cr_uid = cred->cr_ruid;
1891 	tmpcred->cr_groups[0] = cred->cr_rgid;
1892 	td->td_ucred = tmpcred;
1893 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1894 	    pathseg, path, td);
1895 	if ((error = namei(&nd)) != 0)
1896 		goto out1;
1897 	vfslocked = NDHASGIANT(&nd);
1898 	vp = nd.ni_vp;
1899 
1900 	error = vn_access(vp, flags, tmpcred, td);
1901 	NDFREE(&nd, NDF_ONLY_PNBUF);
1902 	vput(vp);
1903 	VFS_UNLOCK_GIANT(vfslocked);
1904 out1:
1905 	td->td_ucred = cred;
1906 	crfree(tmpcred);
1907 	return (error);
1908 }
1909 
1910 /*
1911  * Check access permissions using "effective" credentials.
1912  */
1913 #ifndef _SYS_SYSPROTO_H_
1914 struct eaccess_args {
1915 	char	*path;
1916 	int	flags;
1917 };
1918 #endif
1919 int
1920 eaccess(td, uap)
1921 	struct thread *td;
1922 	register struct eaccess_args /* {
1923 		char *path;
1924 		int flags;
1925 	} */ *uap;
1926 {
1927 
1928 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1929 }
1930 
1931 int
1932 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1933 {
1934 	struct nameidata nd;
1935 	struct vnode *vp;
1936 	int vfslocked;
1937 	int error;
1938 
1939 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1940 	    pathseg, path, td);
1941 	if ((error = namei(&nd)) != 0)
1942 		return (error);
1943 	vp = nd.ni_vp;
1944 	vfslocked = NDHASGIANT(&nd);
1945 	error = vn_access(vp, flags, td->td_ucred, td);
1946 	NDFREE(&nd, NDF_ONLY_PNBUF);
1947 	vput(vp);
1948 	VFS_UNLOCK_GIANT(vfslocked);
1949 	return (error);
1950 }
1951 
1952 #if defined(COMPAT_43)
1953 /*
1954  * Get file status; this version follows links.
1955  */
1956 #ifndef _SYS_SYSPROTO_H_
1957 struct ostat_args {
1958 	char	*path;
1959 	struct ostat *ub;
1960 };
1961 #endif
1962 int
1963 ostat(td, uap)
1964 	struct thread *td;
1965 	register struct ostat_args /* {
1966 		char *path;
1967 		struct ostat *ub;
1968 	} */ *uap;
1969 {
1970 	struct stat sb;
1971 	struct ostat osb;
1972 	int error;
1973 
1974 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1975 	if (error)
1976 		return (error);
1977 	cvtstat(&sb, &osb);
1978 	error = copyout(&osb, uap->ub, sizeof (osb));
1979 	return (error);
1980 }
1981 
1982 /*
1983  * Get file status; this version does not follow links.
1984  */
1985 #ifndef _SYS_SYSPROTO_H_
1986 struct olstat_args {
1987 	char	*path;
1988 	struct ostat *ub;
1989 };
1990 #endif
1991 int
1992 olstat(td, uap)
1993 	struct thread *td;
1994 	register struct olstat_args /* {
1995 		char *path;
1996 		struct ostat *ub;
1997 	} */ *uap;
1998 {
1999 	struct stat sb;
2000 	struct ostat osb;
2001 	int error;
2002 
2003 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2004 	if (error)
2005 		return (error);
2006 	cvtstat(&sb, &osb);
2007 	error = copyout(&osb, uap->ub, sizeof (osb));
2008 	return (error);
2009 }
2010 
2011 /*
2012  * Convert from an old to a new stat structure.
2013  */
2014 void
2015 cvtstat(st, ost)
2016 	struct stat *st;
2017 	struct ostat *ost;
2018 {
2019 
2020 	ost->st_dev = st->st_dev;
2021 	ost->st_ino = st->st_ino;
2022 	ost->st_mode = st->st_mode;
2023 	ost->st_nlink = st->st_nlink;
2024 	ost->st_uid = st->st_uid;
2025 	ost->st_gid = st->st_gid;
2026 	ost->st_rdev = st->st_rdev;
2027 	if (st->st_size < (quad_t)1 << 32)
2028 		ost->st_size = st->st_size;
2029 	else
2030 		ost->st_size = -2;
2031 	ost->st_atime = st->st_atime;
2032 	ost->st_mtime = st->st_mtime;
2033 	ost->st_ctime = st->st_ctime;
2034 	ost->st_blksize = st->st_blksize;
2035 	ost->st_blocks = st->st_blocks;
2036 	ost->st_flags = st->st_flags;
2037 	ost->st_gen = st->st_gen;
2038 }
2039 #endif /* COMPAT_43 */
2040 
2041 /*
2042  * Get file status; this version follows links.
2043  */
2044 #ifndef _SYS_SYSPROTO_H_
2045 struct stat_args {
2046 	char	*path;
2047 	struct stat *ub;
2048 };
2049 #endif
2050 int
2051 stat(td, uap)
2052 	struct thread *td;
2053 	register struct stat_args /* {
2054 		char *path;
2055 		struct stat *ub;
2056 	} */ *uap;
2057 {
2058 	struct stat sb;
2059 	int error;
2060 
2061 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2062 	if (error == 0)
2063 		error = copyout(&sb, uap->ub, sizeof (sb));
2064 	return (error);
2065 }
2066 
2067 int
2068 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2069 {
2070 	struct nameidata nd;
2071 	struct stat sb;
2072 	int error, vfslocked;
2073 
2074 	NDINIT(&nd, LOOKUP,
2075 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2076 	    pathseg, path, td);
2077 	if ((error = namei(&nd)) != 0)
2078 		return (error);
2079 	vfslocked = NDHASGIANT(&nd);
2080 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2081 	NDFREE(&nd, NDF_ONLY_PNBUF);
2082 	vput(nd.ni_vp);
2083 	VFS_UNLOCK_GIANT(vfslocked);
2084 	if (mtx_owned(&Giant))
2085 		printf("stat(%d): %s\n", vfslocked, path);
2086 	if (error)
2087 		return (error);
2088 	*sbp = sb;
2089 	return (0);
2090 }
2091 
2092 /*
2093  * Get file status; this version does not follow links.
2094  */
2095 #ifndef _SYS_SYSPROTO_H_
2096 struct lstat_args {
2097 	char	*path;
2098 	struct stat *ub;
2099 };
2100 #endif
2101 int
2102 lstat(td, uap)
2103 	struct thread *td;
2104 	register struct lstat_args /* {
2105 		char *path;
2106 		struct stat *ub;
2107 	} */ *uap;
2108 {
2109 	struct stat sb;
2110 	int error;
2111 
2112 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2113 	if (error == 0)
2114 		error = copyout(&sb, uap->ub, sizeof (sb));
2115 	return (error);
2116 }
2117 
2118 int
2119 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2120 {
2121 	struct vnode *vp;
2122 	struct stat sb;
2123 	struct nameidata nd;
2124 	int error, vfslocked;
2125 
2126 	NDINIT(&nd, LOOKUP,
2127 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2128 	    pathseg, path, td);
2129 	if ((error = namei(&nd)) != 0)
2130 		return (error);
2131 	vfslocked = NDHASGIANT(&nd);
2132 	vp = nd.ni_vp;
2133 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2134 	NDFREE(&nd, NDF_ONLY_PNBUF);
2135 	vput(vp);
2136 	VFS_UNLOCK_GIANT(vfslocked);
2137 	if (error)
2138 		return (error);
2139 	*sbp = sb;
2140 	return (0);
2141 }
2142 
2143 /*
2144  * Implementation of the NetBSD [l]stat() functions.
2145  */
2146 void
2147 cvtnstat(sb, nsb)
2148 	struct stat *sb;
2149 	struct nstat *nsb;
2150 {
2151 	bzero(nsb, sizeof *nsb);
2152 	nsb->st_dev = sb->st_dev;
2153 	nsb->st_ino = sb->st_ino;
2154 	nsb->st_mode = sb->st_mode;
2155 	nsb->st_nlink = sb->st_nlink;
2156 	nsb->st_uid = sb->st_uid;
2157 	nsb->st_gid = sb->st_gid;
2158 	nsb->st_rdev = sb->st_rdev;
2159 	nsb->st_atimespec = sb->st_atimespec;
2160 	nsb->st_mtimespec = sb->st_mtimespec;
2161 	nsb->st_ctimespec = sb->st_ctimespec;
2162 	nsb->st_size = sb->st_size;
2163 	nsb->st_blocks = sb->st_blocks;
2164 	nsb->st_blksize = sb->st_blksize;
2165 	nsb->st_flags = sb->st_flags;
2166 	nsb->st_gen = sb->st_gen;
2167 	nsb->st_birthtimespec = sb->st_birthtimespec;
2168 }
2169 
2170 #ifndef _SYS_SYSPROTO_H_
2171 struct nstat_args {
2172 	char	*path;
2173 	struct nstat *ub;
2174 };
2175 #endif
2176 int
2177 nstat(td, uap)
2178 	struct thread *td;
2179 	register struct nstat_args /* {
2180 		char *path;
2181 		struct nstat *ub;
2182 	} */ *uap;
2183 {
2184 	struct stat sb;
2185 	struct nstat nsb;
2186 	int error;
2187 
2188 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2189 	if (error)
2190 		return (error);
2191 	cvtnstat(&sb, &nsb);
2192 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2193 	return (error);
2194 }
2195 
2196 /*
2197  * NetBSD lstat.  Get file status; this version does not follow links.
2198  */
2199 #ifndef _SYS_SYSPROTO_H_
2200 struct lstat_args {
2201 	char	*path;
2202 	struct stat *ub;
2203 };
2204 #endif
2205 int
2206 nlstat(td, uap)
2207 	struct thread *td;
2208 	register struct nlstat_args /* {
2209 		char *path;
2210 		struct nstat *ub;
2211 	} */ *uap;
2212 {
2213 	struct stat sb;
2214 	struct nstat nsb;
2215 	int error;
2216 
2217 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2218 	if (error)
2219 		return (error);
2220 	cvtnstat(&sb, &nsb);
2221 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2222 	return (error);
2223 }
2224 
2225 /*
2226  * Get configurable pathname variables.
2227  */
2228 #ifndef _SYS_SYSPROTO_H_
2229 struct pathconf_args {
2230 	char	*path;
2231 	int	name;
2232 };
2233 #endif
2234 int
2235 pathconf(td, uap)
2236 	struct thread *td;
2237 	register struct pathconf_args /* {
2238 		char *path;
2239 		int name;
2240 	} */ *uap;
2241 {
2242 
2243 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2244 }
2245 
2246 int
2247 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2248 {
2249 	struct nameidata nd;
2250 	int error, vfslocked;
2251 
2252 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2253 	    pathseg, path, td);
2254 	if ((error = namei(&nd)) != 0)
2255 		return (error);
2256 	vfslocked = NDHASGIANT(&nd);
2257 	NDFREE(&nd, NDF_ONLY_PNBUF);
2258 
2259 	/* If asynchronous I/O is available, it works for all files. */
2260 	if (name == _PC_ASYNC_IO)
2261 		td->td_retval[0] = async_io_version;
2262 	else
2263 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2264 	vput(nd.ni_vp);
2265 	VFS_UNLOCK_GIANT(vfslocked);
2266 	return (error);
2267 }
2268 
2269 /*
2270  * Return target name of a symbolic link.
2271  */
2272 #ifndef _SYS_SYSPROTO_H_
2273 struct readlink_args {
2274 	char	*path;
2275 	char	*buf;
2276 	int	count;
2277 };
2278 #endif
2279 int
2280 readlink(td, uap)
2281 	struct thread *td;
2282 	register struct readlink_args /* {
2283 		char *path;
2284 		char *buf;
2285 		int count;
2286 	} */ *uap;
2287 {
2288 
2289 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2290 	    UIO_USERSPACE, uap->count));
2291 }
2292 
2293 int
2294 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2295     enum uio_seg bufseg, int count)
2296 {
2297 	register struct vnode *vp;
2298 	struct iovec aiov;
2299 	struct uio auio;
2300 	int error;
2301 	struct nameidata nd;
2302 	int vfslocked;
2303 
2304 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2305 	    pathseg, path, td);
2306 	if ((error = namei(&nd)) != 0)
2307 		return (error);
2308 	NDFREE(&nd, NDF_ONLY_PNBUF);
2309 	vfslocked = NDHASGIANT(&nd);
2310 	vp = nd.ni_vp;
2311 #ifdef MAC
2312 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2313 	if (error) {
2314 		vput(vp);
2315 		VFS_UNLOCK_GIANT(vfslocked);
2316 		return (error);
2317 	}
2318 #endif
2319 	if (vp->v_type != VLNK)
2320 		error = EINVAL;
2321 	else {
2322 		aiov.iov_base = buf;
2323 		aiov.iov_len = count;
2324 		auio.uio_iov = &aiov;
2325 		auio.uio_iovcnt = 1;
2326 		auio.uio_offset = 0;
2327 		auio.uio_rw = UIO_READ;
2328 		auio.uio_segflg = bufseg;
2329 		auio.uio_td = td;
2330 		auio.uio_resid = count;
2331 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2332 	}
2333 	vput(vp);
2334 	VFS_UNLOCK_GIANT(vfslocked);
2335 	td->td_retval[0] = count - auio.uio_resid;
2336 	return (error);
2337 }
2338 
2339 /*
2340  * Common implementation code for chflags() and fchflags().
2341  */
2342 static int
2343 setfflags(td, vp, flags)
2344 	struct thread *td;
2345 	struct vnode *vp;
2346 	int flags;
2347 {
2348 	int error;
2349 	struct mount *mp;
2350 	struct vattr vattr;
2351 
2352 	/*
2353 	 * Prevent non-root users from setting flags on devices.  When
2354 	 * a device is reused, users can retain ownership of the device
2355 	 * if they are allowed to set flags and programs assume that
2356 	 * chown can't fail when done as root.
2357 	 */
2358 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2359 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2360 		    SUSER_ALLOWJAIL);
2361 		if (error)
2362 			return (error);
2363 	}
2364 
2365 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2366 		return (error);
2367 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2368 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2369 	VATTR_NULL(&vattr);
2370 	vattr.va_flags = flags;
2371 #ifdef MAC
2372 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2373 	if (error == 0)
2374 #endif
2375 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2376 	VOP_UNLOCK(vp, 0, td);
2377 	vn_finished_write(mp);
2378 	return (error);
2379 }
2380 
2381 /*
2382  * Change flags of a file given a path name.
2383  */
2384 #ifndef _SYS_SYSPROTO_H_
2385 struct chflags_args {
2386 	char	*path;
2387 	int	flags;
2388 };
2389 #endif
2390 int
2391 chflags(td, uap)
2392 	struct thread *td;
2393 	register struct chflags_args /* {
2394 		char *path;
2395 		int flags;
2396 	} */ *uap;
2397 {
2398 	int error;
2399 	struct nameidata nd;
2400 	int vfslocked;
2401 
2402 	AUDIT_ARG(fflags, uap->flags);
2403 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2404 	    uap->path, td);
2405 	if ((error = namei(&nd)) != 0)
2406 		return (error);
2407 	NDFREE(&nd, NDF_ONLY_PNBUF);
2408 	vfslocked = NDHASGIANT(&nd);
2409 	error = setfflags(td, nd.ni_vp, uap->flags);
2410 	vrele(nd.ni_vp);
2411 	VFS_UNLOCK_GIANT(vfslocked);
2412 	return (error);
2413 }
2414 
2415 /*
2416  * Same as chflags() but doesn't follow symlinks.
2417  */
2418 int
2419 lchflags(td, uap)
2420 	struct thread *td;
2421 	register struct lchflags_args /* {
2422 		char *path;
2423 		int flags;
2424 	} */ *uap;
2425 {
2426 	int error;
2427 	struct nameidata nd;
2428 	int vfslocked;
2429 
2430 	AUDIT_ARG(fflags, uap->flags);
2431 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2432 	    uap->path, td);
2433 	if ((error = namei(&nd)) != 0)
2434 		return (error);
2435 	vfslocked = NDHASGIANT(&nd);
2436 	NDFREE(&nd, NDF_ONLY_PNBUF);
2437 	error = setfflags(td, nd.ni_vp, uap->flags);
2438 	vrele(nd.ni_vp);
2439 	VFS_UNLOCK_GIANT(vfslocked);
2440 	return (error);
2441 }
2442 
2443 /*
2444  * Change flags of a file given a file descriptor.
2445  */
2446 #ifndef _SYS_SYSPROTO_H_
2447 struct fchflags_args {
2448 	int	fd;
2449 	int	flags;
2450 };
2451 #endif
2452 int
2453 fchflags(td, uap)
2454 	struct thread *td;
2455 	register struct fchflags_args /* {
2456 		int fd;
2457 		int flags;
2458 	} */ *uap;
2459 {
2460 	struct file *fp;
2461 	int vfslocked;
2462 	int error;
2463 
2464 	AUDIT_ARG(fd, uap->fd);
2465 	AUDIT_ARG(fflags, uap->flags);
2466 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2467 		return (error);
2468 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2469 #ifdef AUDIT
2470 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2471 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2472 	VOP_UNLOCK(fp->f_vnode, 0, td);
2473 #endif
2474 	error = setfflags(td, fp->f_vnode, uap->flags);
2475 	VFS_UNLOCK_GIANT(vfslocked);
2476 	fdrop(fp, td);
2477 	return (error);
2478 }
2479 
2480 /*
2481  * Common implementation code for chmod(), lchmod() and fchmod().
2482  */
2483 static int
2484 setfmode(td, vp, mode)
2485 	struct thread *td;
2486 	struct vnode *vp;
2487 	int mode;
2488 {
2489 	int error;
2490 	struct mount *mp;
2491 	struct vattr vattr;
2492 
2493 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2494 		return (error);
2495 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2496 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2497 	VATTR_NULL(&vattr);
2498 	vattr.va_mode = mode & ALLPERMS;
2499 #ifdef MAC
2500 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2501 	if (error == 0)
2502 #endif
2503 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2504 	VOP_UNLOCK(vp, 0, td);
2505 	vn_finished_write(mp);
2506 	return (error);
2507 }
2508 
2509 /*
2510  * Change mode of a file given path name.
2511  */
2512 #ifndef _SYS_SYSPROTO_H_
2513 struct chmod_args {
2514 	char	*path;
2515 	int	mode;
2516 };
2517 #endif
2518 int
2519 chmod(td, uap)
2520 	struct thread *td;
2521 	register struct chmod_args /* {
2522 		char *path;
2523 		int mode;
2524 	} */ *uap;
2525 {
2526 
2527 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2528 }
2529 
2530 int
2531 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2532 {
2533 	int error;
2534 	struct nameidata nd;
2535 	int vfslocked;
2536 
2537 	AUDIT_ARG(mode, mode);
2538 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2539 	if ((error = namei(&nd)) != 0)
2540 		return (error);
2541 	vfslocked = NDHASGIANT(&nd);
2542 	NDFREE(&nd, NDF_ONLY_PNBUF);
2543 	error = setfmode(td, nd.ni_vp, mode);
2544 	vrele(nd.ni_vp);
2545 	VFS_UNLOCK_GIANT(vfslocked);
2546 	return (error);
2547 }
2548 
2549 /*
2550  * Change mode of a file given path name (don't follow links.)
2551  */
2552 #ifndef _SYS_SYSPROTO_H_
2553 struct lchmod_args {
2554 	char	*path;
2555 	int	mode;
2556 };
2557 #endif
2558 int
2559 lchmod(td, uap)
2560 	struct thread *td;
2561 	register struct lchmod_args /* {
2562 		char *path;
2563 		int mode;
2564 	} */ *uap;
2565 {
2566 	int error;
2567 	struct nameidata nd;
2568 	int vfslocked;
2569 
2570 	AUDIT_ARG(mode, (mode_t)uap->mode);
2571 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2572 	    uap->path, td);
2573 	if ((error = namei(&nd)) != 0)
2574 		return (error);
2575 	vfslocked = NDHASGIANT(&nd);
2576 	NDFREE(&nd, NDF_ONLY_PNBUF);
2577 	error = setfmode(td, nd.ni_vp, uap->mode);
2578 	vrele(nd.ni_vp);
2579 	VFS_UNLOCK_GIANT(vfslocked);
2580 	return (error);
2581 }
2582 
2583 /*
2584  * Change mode of a file given a file descriptor.
2585  */
2586 #ifndef _SYS_SYSPROTO_H_
2587 struct fchmod_args {
2588 	int	fd;
2589 	int	mode;
2590 };
2591 #endif
2592 int
2593 fchmod(td, uap)
2594 	struct thread *td;
2595 	register struct fchmod_args /* {
2596 		int fd;
2597 		int mode;
2598 	} */ *uap;
2599 {
2600 	struct file *fp;
2601 	int vfslocked;
2602 	int error;
2603 
2604 	AUDIT_ARG(fd, uap->fd);
2605 	AUDIT_ARG(mode, uap->mode);
2606 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2607 		return (error);
2608 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2609 #ifdef AUDIT
2610 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2611 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2612 	VOP_UNLOCK(fp->f_vnode, 0, td);
2613 #endif
2614 	error = setfmode(td, fp->f_vnode, uap->mode);
2615 	VFS_UNLOCK_GIANT(vfslocked);
2616 	fdrop(fp, td);
2617 	return (error);
2618 }
2619 
2620 /*
2621  * Common implementation for chown(), lchown(), and fchown()
2622  */
2623 static int
2624 setfown(td, vp, uid, gid)
2625 	struct thread *td;
2626 	struct vnode *vp;
2627 	uid_t uid;
2628 	gid_t gid;
2629 {
2630 	int error;
2631 	struct mount *mp;
2632 	struct vattr vattr;
2633 
2634 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2635 		return (error);
2636 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2637 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2638 	VATTR_NULL(&vattr);
2639 	vattr.va_uid = uid;
2640 	vattr.va_gid = gid;
2641 #ifdef MAC
2642 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2643 	    vattr.va_gid);
2644 	if (error == 0)
2645 #endif
2646 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2647 	VOP_UNLOCK(vp, 0, td);
2648 	vn_finished_write(mp);
2649 	return (error);
2650 }
2651 
2652 /*
2653  * Set ownership given a path name.
2654  */
2655 #ifndef _SYS_SYSPROTO_H_
2656 struct chown_args {
2657 	char	*path;
2658 	int	uid;
2659 	int	gid;
2660 };
2661 #endif
2662 int
2663 chown(td, uap)
2664 	struct thread *td;
2665 	register struct chown_args /* {
2666 		char *path;
2667 		int uid;
2668 		int gid;
2669 	} */ *uap;
2670 {
2671 
2672 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2673 }
2674 
2675 int
2676 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2677     int gid)
2678 {
2679 	int error;
2680 	struct nameidata nd;
2681 	int vfslocked;
2682 
2683 	AUDIT_ARG(owner, uid, gid);
2684 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2685 	if ((error = namei(&nd)) != 0)
2686 		return (error);
2687 	vfslocked = NDHASGIANT(&nd);
2688 	NDFREE(&nd, NDF_ONLY_PNBUF);
2689 	error = setfown(td, nd.ni_vp, uid, gid);
2690 	vrele(nd.ni_vp);
2691 	VFS_UNLOCK_GIANT(vfslocked);
2692 	return (error);
2693 }
2694 
2695 /*
2696  * Set ownership given a path name, do not cross symlinks.
2697  */
2698 #ifndef _SYS_SYSPROTO_H_
2699 struct lchown_args {
2700 	char	*path;
2701 	int	uid;
2702 	int	gid;
2703 };
2704 #endif
2705 int
2706 lchown(td, uap)
2707 	struct thread *td;
2708 	register struct lchown_args /* {
2709 		char *path;
2710 		int uid;
2711 		int gid;
2712 	} */ *uap;
2713 {
2714 
2715 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2716 }
2717 
2718 int
2719 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2720     int gid)
2721 {
2722 	int error;
2723 	struct nameidata nd;
2724 	int vfslocked;
2725 
2726 	AUDIT_ARG(owner, uid, gid);
2727 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2728 	if ((error = namei(&nd)) != 0)
2729 		return (error);
2730 	vfslocked = NDHASGIANT(&nd);
2731 	NDFREE(&nd, NDF_ONLY_PNBUF);
2732 	error = setfown(td, nd.ni_vp, uid, gid);
2733 	vrele(nd.ni_vp);
2734 	VFS_UNLOCK_GIANT(vfslocked);
2735 	return (error);
2736 }
2737 
2738 /*
2739  * Set ownership given a file descriptor.
2740  */
2741 #ifndef _SYS_SYSPROTO_H_
2742 struct fchown_args {
2743 	int	fd;
2744 	int	uid;
2745 	int	gid;
2746 };
2747 #endif
2748 int
2749 fchown(td, uap)
2750 	struct thread *td;
2751 	register struct fchown_args /* {
2752 		int fd;
2753 		int uid;
2754 		int gid;
2755 	} */ *uap;
2756 {
2757 	struct file *fp;
2758 	int vfslocked;
2759 	int error;
2760 
2761 	AUDIT_ARG(fd, uap->fd);
2762 	AUDIT_ARG(owner, uap->uid, uap->gid);
2763 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2764 		return (error);
2765 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2766 #ifdef AUDIT
2767 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2768 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2769 	VOP_UNLOCK(fp->f_vnode, 0, td);
2770 #endif
2771 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2772 	VFS_UNLOCK_GIANT(vfslocked);
2773 	fdrop(fp, td);
2774 	return (error);
2775 }
2776 
2777 /*
2778  * Common implementation code for utimes(), lutimes(), and futimes().
2779  */
2780 static int
2781 getutimes(usrtvp, tvpseg, tsp)
2782 	const struct timeval *usrtvp;
2783 	enum uio_seg tvpseg;
2784 	struct timespec *tsp;
2785 {
2786 	struct timeval tv[2];
2787 	const struct timeval *tvp;
2788 	int error;
2789 
2790 	if (usrtvp == NULL) {
2791 		microtime(&tv[0]);
2792 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2793 		tsp[1] = tsp[0];
2794 	} else {
2795 		if (tvpseg == UIO_SYSSPACE) {
2796 			tvp = usrtvp;
2797 		} else {
2798 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2799 				return (error);
2800 			tvp = tv;
2801 		}
2802 
2803 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2804 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2805 			return (EINVAL);
2806 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2807 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2808 	}
2809 	return (0);
2810 }
2811 
2812 /*
2813  * Common implementation code for utimes(), lutimes(), and futimes().
2814  */
2815 static int
2816 setutimes(td, vp, ts, numtimes, nullflag)
2817 	struct thread *td;
2818 	struct vnode *vp;
2819 	const struct timespec *ts;
2820 	int numtimes;
2821 	int nullflag;
2822 {
2823 	int error, setbirthtime;
2824 	struct mount *mp;
2825 	struct vattr vattr;
2826 
2827 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2828 		return (error);
2829 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2830 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2831 	setbirthtime = 0;
2832 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2833 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2834 		setbirthtime = 1;
2835 	VATTR_NULL(&vattr);
2836 	vattr.va_atime = ts[0];
2837 	vattr.va_mtime = ts[1];
2838 	if (setbirthtime)
2839 		vattr.va_birthtime = ts[1];
2840 	if (numtimes > 2)
2841 		vattr.va_birthtime = ts[2];
2842 	if (nullflag)
2843 		vattr.va_vaflags |= VA_UTIMES_NULL;
2844 #ifdef MAC
2845 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2846 	    vattr.va_mtime);
2847 #endif
2848 	if (error == 0)
2849 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2850 	VOP_UNLOCK(vp, 0, td);
2851 	vn_finished_write(mp);
2852 	return (error);
2853 }
2854 
2855 /*
2856  * Set the access and modification times of a file.
2857  */
2858 #ifndef _SYS_SYSPROTO_H_
2859 struct utimes_args {
2860 	char	*path;
2861 	struct	timeval *tptr;
2862 };
2863 #endif
2864 int
2865 utimes(td, uap)
2866 	struct thread *td;
2867 	register struct utimes_args /* {
2868 		char *path;
2869 		struct timeval *tptr;
2870 	} */ *uap;
2871 {
2872 
2873 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2874 	    UIO_USERSPACE));
2875 }
2876 
2877 int
2878 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2879     struct timeval *tptr, enum uio_seg tptrseg)
2880 {
2881 	struct timespec ts[2];
2882 	int error;
2883 	struct nameidata nd;
2884 	int vfslocked;
2885 
2886 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2887 		return (error);
2888 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2889 	if ((error = namei(&nd)) != 0)
2890 		return (error);
2891 	vfslocked = NDHASGIANT(&nd);
2892 	NDFREE(&nd, NDF_ONLY_PNBUF);
2893 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2894 	vrele(nd.ni_vp);
2895 	VFS_UNLOCK_GIANT(vfslocked);
2896 	return (error);
2897 }
2898 
2899 /*
2900  * Set the access and modification times of a file.
2901  */
2902 #ifndef _SYS_SYSPROTO_H_
2903 struct lutimes_args {
2904 	char	*path;
2905 	struct	timeval *tptr;
2906 };
2907 #endif
2908 int
2909 lutimes(td, uap)
2910 	struct thread *td;
2911 	register struct lutimes_args /* {
2912 		char *path;
2913 		struct timeval *tptr;
2914 	} */ *uap;
2915 {
2916 
2917 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2918 	    UIO_USERSPACE));
2919 }
2920 
2921 int
2922 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2923     struct timeval *tptr, enum uio_seg tptrseg)
2924 {
2925 	struct timespec ts[2];
2926 	int error;
2927 	struct nameidata nd;
2928 	int vfslocked;
2929 
2930 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2931 		return (error);
2932 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2933 	if ((error = namei(&nd)) != 0)
2934 		return (error);
2935 	vfslocked = NDHASGIANT(&nd);
2936 	NDFREE(&nd, NDF_ONLY_PNBUF);
2937 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2938 	vrele(nd.ni_vp);
2939 	VFS_UNLOCK_GIANT(vfslocked);
2940 	return (error);
2941 }
2942 
2943 /*
2944  * Set the access and modification times of a file.
2945  */
2946 #ifndef _SYS_SYSPROTO_H_
2947 struct futimes_args {
2948 	int	fd;
2949 	struct	timeval *tptr;
2950 };
2951 #endif
2952 int
2953 futimes(td, uap)
2954 	struct thread *td;
2955 	register struct futimes_args /* {
2956 		int  fd;
2957 		struct timeval *tptr;
2958 	} */ *uap;
2959 {
2960 
2961 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2962 }
2963 
2964 int
2965 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2966     enum uio_seg tptrseg)
2967 {
2968 	struct timespec ts[2];
2969 	struct file *fp;
2970 	int vfslocked;
2971 	int error;
2972 
2973 	AUDIT_ARG(fd, fd);
2974 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2975 		return (error);
2976 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2977 		return (error);
2978 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2979 #ifdef AUDIT
2980 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2981 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2982 	VOP_UNLOCK(fp->f_vnode, 0, td);
2983 #endif
2984 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2985 	VFS_UNLOCK_GIANT(vfslocked);
2986 	fdrop(fp, td);
2987 	return (error);
2988 }
2989 
2990 /*
2991  * Truncate a file given its path name.
2992  */
2993 #ifndef _SYS_SYSPROTO_H_
2994 struct truncate_args {
2995 	char	*path;
2996 	int	pad;
2997 	off_t	length;
2998 };
2999 #endif
3000 int
3001 truncate(td, uap)
3002 	struct thread *td;
3003 	register struct truncate_args /* {
3004 		char *path;
3005 		int pad;
3006 		off_t length;
3007 	} */ *uap;
3008 {
3009 
3010 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3011 }
3012 
3013 int
3014 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3015 {
3016 	struct mount *mp;
3017 	struct vnode *vp;
3018 	struct vattr vattr;
3019 	int error;
3020 	struct nameidata nd;
3021 	int vfslocked;
3022 
3023 	if (length < 0)
3024 		return(EINVAL);
3025 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3026 	if ((error = namei(&nd)) != 0)
3027 		return (error);
3028 	vfslocked = NDHASGIANT(&nd);
3029 	vp = nd.ni_vp;
3030 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3031 		vrele(vp);
3032 		VFS_UNLOCK_GIANT(vfslocked);
3033 		return (error);
3034 	}
3035 	NDFREE(&nd, NDF_ONLY_PNBUF);
3036 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3037 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3038 	if (vp->v_type == VDIR)
3039 		error = EISDIR;
3040 #ifdef MAC
3041 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3042 	}
3043 #endif
3044 	else if ((error = vn_writechk(vp)) == 0 &&
3045 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3046 		VATTR_NULL(&vattr);
3047 		vattr.va_size = length;
3048 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3049 	}
3050 	vput(vp);
3051 	vn_finished_write(mp);
3052 	VFS_UNLOCK_GIANT(vfslocked);
3053 	return (error);
3054 }
3055 
3056 /*
3057  * Truncate a file given a file descriptor.
3058  */
3059 #ifndef _SYS_SYSPROTO_H_
3060 struct ftruncate_args {
3061 	int	fd;
3062 	int	pad;
3063 	off_t	length;
3064 };
3065 #endif
3066 int
3067 ftruncate(td, uap)
3068 	struct thread *td;
3069 	register struct ftruncate_args /* {
3070 		int fd;
3071 		int pad;
3072 		off_t length;
3073 	} */ *uap;
3074 {
3075 	struct mount *mp;
3076 	struct vattr vattr;
3077 	struct vnode *vp;
3078 	struct file *fp;
3079 	int vfslocked;
3080 	int error;
3081 
3082 	AUDIT_ARG(fd, uap->fd);
3083 	if (uap->length < 0)
3084 		return(EINVAL);
3085 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3086 		return (error);
3087 	if ((fp->f_flag & FWRITE) == 0) {
3088 		fdrop(fp, td);
3089 		return (EINVAL);
3090 	}
3091 	vp = fp->f_vnode;
3092 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3093 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3094 		goto drop;
3095 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3096 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3097 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3098 	if (vp->v_type == VDIR)
3099 		error = EISDIR;
3100 #ifdef MAC
3101 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3102 	    vp))) {
3103 	}
3104 #endif
3105 	else if ((error = vn_writechk(vp)) == 0) {
3106 		VATTR_NULL(&vattr);
3107 		vattr.va_size = uap->length;
3108 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3109 	}
3110 	VOP_UNLOCK(vp, 0, td);
3111 	vn_finished_write(mp);
3112 drop:
3113 	VFS_UNLOCK_GIANT(vfslocked);
3114 	fdrop(fp, td);
3115 	return (error);
3116 }
3117 
3118 #if defined(COMPAT_43)
3119 /*
3120  * Truncate a file given its path name.
3121  */
3122 #ifndef _SYS_SYSPROTO_H_
3123 struct otruncate_args {
3124 	char	*path;
3125 	long	length;
3126 };
3127 #endif
3128 int
3129 otruncate(td, uap)
3130 	struct thread *td;
3131 	register struct otruncate_args /* {
3132 		char *path;
3133 		long length;
3134 	} */ *uap;
3135 {
3136 	struct truncate_args /* {
3137 		char *path;
3138 		int pad;
3139 		off_t length;
3140 	} */ nuap;
3141 
3142 	nuap.path = uap->path;
3143 	nuap.length = uap->length;
3144 	return (truncate(td, &nuap));
3145 }
3146 
3147 /*
3148  * Truncate a file given a file descriptor.
3149  */
3150 #ifndef _SYS_SYSPROTO_H_
3151 struct oftruncate_args {
3152 	int	fd;
3153 	long	length;
3154 };
3155 #endif
3156 int
3157 oftruncate(td, uap)
3158 	struct thread *td;
3159 	register struct oftruncate_args /* {
3160 		int fd;
3161 		long length;
3162 	} */ *uap;
3163 {
3164 	struct ftruncate_args /* {
3165 		int fd;
3166 		int pad;
3167 		off_t length;
3168 	} */ nuap;
3169 
3170 	nuap.fd = uap->fd;
3171 	nuap.length = uap->length;
3172 	return (ftruncate(td, &nuap));
3173 }
3174 #endif /* COMPAT_43 */
3175 
3176 /*
3177  * Sync an open file.
3178  */
3179 #ifndef _SYS_SYSPROTO_H_
3180 struct fsync_args {
3181 	int	fd;
3182 };
3183 #endif
3184 int
3185 fsync(td, uap)
3186 	struct thread *td;
3187 	struct fsync_args /* {
3188 		int fd;
3189 	} */ *uap;
3190 {
3191 	struct vnode *vp;
3192 	struct mount *mp;
3193 	struct file *fp;
3194 	int vfslocked;
3195 	int error;
3196 
3197 	AUDIT_ARG(fd, uap->fd);
3198 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3199 		return (error);
3200 	vp = fp->f_vnode;
3201 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3202 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3203 		goto drop;
3204 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3205 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3206 	if (vp->v_object != NULL) {
3207 		VM_OBJECT_LOCK(vp->v_object);
3208 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3209 		VM_OBJECT_UNLOCK(vp->v_object);
3210 	}
3211 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3212 
3213 	VOP_UNLOCK(vp, 0, td);
3214 	vn_finished_write(mp);
3215 drop:
3216 	VFS_UNLOCK_GIANT(vfslocked);
3217 	fdrop(fp, td);
3218 	return (error);
3219 }
3220 
3221 /*
3222  * Rename files.  Source and destination must either both be directories, or
3223  * both not be directories.  If target is a directory, it must be empty.
3224  */
3225 #ifndef _SYS_SYSPROTO_H_
3226 struct rename_args {
3227 	char	*from;
3228 	char	*to;
3229 };
3230 #endif
3231 int
3232 rename(td, uap)
3233 	struct thread *td;
3234 	register struct rename_args /* {
3235 		char *from;
3236 		char *to;
3237 	} */ *uap;
3238 {
3239 
3240 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3241 }
3242 
3243 int
3244 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3245 {
3246 	struct mount *mp = NULL;
3247 	struct vnode *tvp, *fvp, *tdvp;
3248 	struct nameidata fromnd, tond;
3249 	int tvfslocked;
3250 	int fvfslocked;
3251 	int error;
3252 
3253 	bwillwrite();
3254 #ifdef MAC
3255 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3256 	    AUDITVNODE1, pathseg, from, td);
3257 #else
3258 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3259 	    AUDITVNODE1, pathseg, from, td);
3260 #endif
3261 	if ((error = namei(&fromnd)) != 0)
3262 		return (error);
3263 	fvfslocked = NDHASGIANT(&fromnd);
3264 	tvfslocked = 0;
3265 #ifdef MAC
3266 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3267 	    fromnd.ni_vp, &fromnd.ni_cnd);
3268 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3269 	if (fromnd.ni_dvp != fromnd.ni_vp)
3270 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3271 #endif
3272 	fvp = fromnd.ni_vp;
3273 	if (error == 0)
3274 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3275 	if (error != 0) {
3276 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3277 		vrele(fromnd.ni_dvp);
3278 		vrele(fvp);
3279 		goto out1;
3280 	}
3281 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3282 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3283 	if (fromnd.ni_vp->v_type == VDIR)
3284 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3285 	if ((error = namei(&tond)) != 0) {
3286 		/* Translate error code for rename("dir1", "dir2/."). */
3287 		if (error == EISDIR && fvp->v_type == VDIR)
3288 			error = EINVAL;
3289 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3290 		vrele(fromnd.ni_dvp);
3291 		vrele(fvp);
3292 		vn_finished_write(mp);
3293 		goto out1;
3294 	}
3295 	tvfslocked = NDHASGIANT(&tond);
3296 	tdvp = tond.ni_dvp;
3297 	tvp = tond.ni_vp;
3298 	if (tvp != NULL) {
3299 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3300 			error = ENOTDIR;
3301 			goto out;
3302 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3303 			error = EISDIR;
3304 			goto out;
3305 		}
3306 	}
3307 	if (fvp == tdvp)
3308 		error = EINVAL;
3309 	/*
3310 	 * If the source is the same as the destination (that is, if they
3311 	 * are links to the same vnode), then there is nothing to do.
3312 	 */
3313 	if (fvp == tvp)
3314 		error = -1;
3315 #ifdef MAC
3316 	else
3317 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3318 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3319 #endif
3320 out:
3321 	if (!error) {
3322 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3323 		if (fromnd.ni_dvp != tdvp) {
3324 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3325 		}
3326 		if (tvp) {
3327 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3328 		}
3329 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3330 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3331 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3332 		NDFREE(&tond, NDF_ONLY_PNBUF);
3333 	} else {
3334 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3335 		NDFREE(&tond, NDF_ONLY_PNBUF);
3336 		if (tvp)
3337 			vput(tvp);
3338 		if (tdvp == tvp)
3339 			vrele(tdvp);
3340 		else
3341 			vput(tdvp);
3342 		vrele(fromnd.ni_dvp);
3343 		vrele(fvp);
3344 	}
3345 	vrele(tond.ni_startdir);
3346 	vn_finished_write(mp);
3347 out1:
3348 	if (fromnd.ni_startdir)
3349 		vrele(fromnd.ni_startdir);
3350 	VFS_UNLOCK_GIANT(fvfslocked);
3351 	VFS_UNLOCK_GIANT(tvfslocked);
3352 	if (error == -1)
3353 		return (0);
3354 	return (error);
3355 }
3356 
3357 /*
3358  * Make a directory file.
3359  */
3360 #ifndef _SYS_SYSPROTO_H_
3361 struct mkdir_args {
3362 	char	*path;
3363 	int	mode;
3364 };
3365 #endif
3366 int
3367 mkdir(td, uap)
3368 	struct thread *td;
3369 	register struct mkdir_args /* {
3370 		char *path;
3371 		int mode;
3372 	} */ *uap;
3373 {
3374 
3375 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3376 }
3377 
3378 int
3379 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3380 {
3381 	struct mount *mp;
3382 	struct vnode *vp;
3383 	struct vattr vattr;
3384 	int error;
3385 	struct nameidata nd;
3386 	int vfslocked;
3387 
3388 	AUDIT_ARG(mode, mode);
3389 restart:
3390 	bwillwrite();
3391 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3392 	    segflg, path, td);
3393 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3394 	if ((error = namei(&nd)) != 0)
3395 		return (error);
3396 	vfslocked = NDHASGIANT(&nd);
3397 	vp = nd.ni_vp;
3398 	if (vp != NULL) {
3399 		NDFREE(&nd, NDF_ONLY_PNBUF);
3400 		/*
3401 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3402 		 * the strange behaviour of leaving the vnode unlocked
3403 		 * if the target is the same vnode as the parent.
3404 		 */
3405 		if (vp == nd.ni_dvp)
3406 			vrele(nd.ni_dvp);
3407 		else
3408 			vput(nd.ni_dvp);
3409 		vrele(vp);
3410 		VFS_UNLOCK_GIANT(vfslocked);
3411 		return (EEXIST);
3412 	}
3413 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3414 		NDFREE(&nd, NDF_ONLY_PNBUF);
3415 		vput(nd.ni_dvp);
3416 		VFS_UNLOCK_GIANT(vfslocked);
3417 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3418 			return (error);
3419 		goto restart;
3420 	}
3421 	VATTR_NULL(&vattr);
3422 	vattr.va_type = VDIR;
3423 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3424 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3425 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3426 #ifdef MAC
3427 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3428 	    &vattr);
3429 	if (error)
3430 		goto out;
3431 #endif
3432 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3433 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3434 #ifdef MAC
3435 out:
3436 #endif
3437 	NDFREE(&nd, NDF_ONLY_PNBUF);
3438 	vput(nd.ni_dvp);
3439 	if (!error)
3440 		vput(nd.ni_vp);
3441 	vn_finished_write(mp);
3442 	VFS_UNLOCK_GIANT(vfslocked);
3443 	return (error);
3444 }
3445 
3446 /*
3447  * Remove a directory file.
3448  */
3449 #ifndef _SYS_SYSPROTO_H_
3450 struct rmdir_args {
3451 	char	*path;
3452 };
3453 #endif
3454 int
3455 rmdir(td, uap)
3456 	struct thread *td;
3457 	struct rmdir_args /* {
3458 		char *path;
3459 	} */ *uap;
3460 {
3461 
3462 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3463 }
3464 
3465 int
3466 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3467 {
3468 	struct mount *mp;
3469 	struct vnode *vp;
3470 	int error;
3471 	struct nameidata nd;
3472 	int vfslocked;
3473 
3474 restart:
3475 	bwillwrite();
3476 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3477 	    pathseg, path, td);
3478 	if ((error = namei(&nd)) != 0)
3479 		return (error);
3480 	vfslocked = NDHASGIANT(&nd);
3481 	vp = nd.ni_vp;
3482 	if (vp->v_type != VDIR) {
3483 		error = ENOTDIR;
3484 		goto out;
3485 	}
3486 	/*
3487 	 * No rmdir "." please.
3488 	 */
3489 	if (nd.ni_dvp == vp) {
3490 		error = EINVAL;
3491 		goto out;
3492 	}
3493 	/*
3494 	 * The root of a mounted filesystem cannot be deleted.
3495 	 */
3496 	if (vp->v_vflag & VV_ROOT) {
3497 		error = EBUSY;
3498 		goto out;
3499 	}
3500 #ifdef MAC
3501 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3502 	    &nd.ni_cnd);
3503 	if (error)
3504 		goto out;
3505 #endif
3506 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3507 		NDFREE(&nd, NDF_ONLY_PNBUF);
3508 		vput(vp);
3509 		if (nd.ni_dvp == vp)
3510 			vrele(nd.ni_dvp);
3511 		else
3512 			vput(nd.ni_dvp);
3513 		VFS_UNLOCK_GIANT(vfslocked);
3514 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3515 			return (error);
3516 		goto restart;
3517 	}
3518 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3519 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3520 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3521 	vn_finished_write(mp);
3522 out:
3523 	NDFREE(&nd, NDF_ONLY_PNBUF);
3524 	vput(vp);
3525 	if (nd.ni_dvp == vp)
3526 		vrele(nd.ni_dvp);
3527 	else
3528 		vput(nd.ni_dvp);
3529 	VFS_UNLOCK_GIANT(vfslocked);
3530 	return (error);
3531 }
3532 
3533 #ifdef COMPAT_43
3534 /*
3535  * Read a block of directory entries in a filesystem independent format.
3536  */
3537 #ifndef _SYS_SYSPROTO_H_
3538 struct ogetdirentries_args {
3539 	int	fd;
3540 	char	*buf;
3541 	u_int	count;
3542 	long	*basep;
3543 };
3544 #endif
3545 int
3546 ogetdirentries(td, uap)
3547 	struct thread *td;
3548 	register struct ogetdirentries_args /* {
3549 		int fd;
3550 		char *buf;
3551 		u_int count;
3552 		long *basep;
3553 	} */ *uap;
3554 {
3555 	struct vnode *vp;
3556 	struct file *fp;
3557 	struct uio auio, kuio;
3558 	struct iovec aiov, kiov;
3559 	struct dirent *dp, *edp;
3560 	caddr_t dirbuf;
3561 	int error, eofflag, readcnt, vfslocked;
3562 	long loff;
3563 
3564 	/* XXX arbitrary sanity limit on `count'. */
3565 	if (uap->count > 64 * 1024)
3566 		return (EINVAL);
3567 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3568 		return (error);
3569 	if ((fp->f_flag & FREAD) == 0) {
3570 		fdrop(fp, td);
3571 		return (EBADF);
3572 	}
3573 	vp = fp->f_vnode;
3574 unionread:
3575 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3576 	if (vp->v_type != VDIR) {
3577 		VFS_UNLOCK_GIANT(vfslocked);
3578 		fdrop(fp, td);
3579 		return (EINVAL);
3580 	}
3581 	aiov.iov_base = uap->buf;
3582 	aiov.iov_len = uap->count;
3583 	auio.uio_iov = &aiov;
3584 	auio.uio_iovcnt = 1;
3585 	auio.uio_rw = UIO_READ;
3586 	auio.uio_segflg = UIO_USERSPACE;
3587 	auio.uio_td = td;
3588 	auio.uio_resid = uap->count;
3589 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3590 	loff = auio.uio_offset = fp->f_offset;
3591 #ifdef MAC
3592 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3593 	if (error) {
3594 		VOP_UNLOCK(vp, 0, td);
3595 		VFS_UNLOCK_GIANT(vfslocked);
3596 		fdrop(fp, td);
3597 		return (error);
3598 	}
3599 #endif
3600 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3601 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3602 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3603 			    NULL, NULL);
3604 			fp->f_offset = auio.uio_offset;
3605 		} else
3606 #	endif
3607 	{
3608 		kuio = auio;
3609 		kuio.uio_iov = &kiov;
3610 		kuio.uio_segflg = UIO_SYSSPACE;
3611 		kiov.iov_len = uap->count;
3612 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3613 		kiov.iov_base = dirbuf;
3614 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3615 			    NULL, NULL);
3616 		fp->f_offset = kuio.uio_offset;
3617 		if (error == 0) {
3618 			readcnt = uap->count - kuio.uio_resid;
3619 			edp = (struct dirent *)&dirbuf[readcnt];
3620 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3621 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3622 					/*
3623 					 * The expected low byte of
3624 					 * dp->d_namlen is our dp->d_type.
3625 					 * The high MBZ byte of dp->d_namlen
3626 					 * is our dp->d_namlen.
3627 					 */
3628 					dp->d_type = dp->d_namlen;
3629 					dp->d_namlen = 0;
3630 #				else
3631 					/*
3632 					 * The dp->d_type is the high byte
3633 					 * of the expected dp->d_namlen,
3634 					 * so must be zero'ed.
3635 					 */
3636 					dp->d_type = 0;
3637 #				endif
3638 				if (dp->d_reclen > 0) {
3639 					dp = (struct dirent *)
3640 					    ((char *)dp + dp->d_reclen);
3641 				} else {
3642 					error = EIO;
3643 					break;
3644 				}
3645 			}
3646 			if (dp >= edp)
3647 				error = uiomove(dirbuf, readcnt, &auio);
3648 		}
3649 		FREE(dirbuf, M_TEMP);
3650 	}
3651 	if (error) {
3652 		VOP_UNLOCK(vp, 0, td);
3653 		VFS_UNLOCK_GIANT(vfslocked);
3654 		fdrop(fp, td);
3655 		return (error);
3656 	}
3657 	if (uap->count == auio.uio_resid &&
3658 	    (vp->v_vflag & VV_ROOT) &&
3659 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3660 		struct vnode *tvp = vp;
3661 		vp = vp->v_mount->mnt_vnodecovered;
3662 		VREF(vp);
3663 		fp->f_vnode = vp;
3664 		fp->f_data = vp;
3665 		fp->f_offset = 0;
3666 		vput(tvp);
3667 		VFS_UNLOCK_GIANT(vfslocked);
3668 		goto unionread;
3669 	}
3670 	VOP_UNLOCK(vp, 0, td);
3671 	VFS_UNLOCK_GIANT(vfslocked);
3672 	error = copyout(&loff, uap->basep, sizeof(long));
3673 	fdrop(fp, td);
3674 	td->td_retval[0] = uap->count - auio.uio_resid;
3675 	return (error);
3676 }
3677 #endif /* COMPAT_43 */
3678 
3679 /*
3680  * Read a block of directory entries in a filesystem independent format.
3681  */
3682 #ifndef _SYS_SYSPROTO_H_
3683 struct getdirentries_args {
3684 	int	fd;
3685 	char	*buf;
3686 	u_int	count;
3687 	long	*basep;
3688 };
3689 #endif
3690 int
3691 getdirentries(td, uap)
3692 	struct thread *td;
3693 	register struct getdirentries_args /* {
3694 		int fd;
3695 		char *buf;
3696 		u_int count;
3697 		long *basep;
3698 	} */ *uap;
3699 {
3700 	struct vnode *vp;
3701 	struct file *fp;
3702 	struct uio auio;
3703 	struct iovec aiov;
3704 	int vfslocked;
3705 	long loff;
3706 	int error, eofflag;
3707 
3708 	AUDIT_ARG(fd, uap->fd);
3709 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3710 		return (error);
3711 	if ((fp->f_flag & FREAD) == 0) {
3712 		fdrop(fp, td);
3713 		return (EBADF);
3714 	}
3715 	vp = fp->f_vnode;
3716 unionread:
3717 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3718 	if (vp->v_type != VDIR) {
3719 		VFS_UNLOCK_GIANT(vfslocked);
3720 		error = EINVAL;
3721 		goto fail;
3722 	}
3723 	aiov.iov_base = uap->buf;
3724 	aiov.iov_len = uap->count;
3725 	auio.uio_iov = &aiov;
3726 	auio.uio_iovcnt = 1;
3727 	auio.uio_rw = UIO_READ;
3728 	auio.uio_segflg = UIO_USERSPACE;
3729 	auio.uio_td = td;
3730 	auio.uio_resid = uap->count;
3731 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3732 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3733 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3734 	loff = auio.uio_offset = fp->f_offset;
3735 #ifdef MAC
3736 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3737 	if (error == 0)
3738 #endif
3739 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3740 		    NULL);
3741 	fp->f_offset = auio.uio_offset;
3742 	if (error) {
3743 		VOP_UNLOCK(vp, 0, td);
3744 		VFS_UNLOCK_GIANT(vfslocked);
3745 		goto fail;
3746 	}
3747 	if (uap->count == auio.uio_resid &&
3748 	    (vp->v_vflag & VV_ROOT) &&
3749 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3750 		struct vnode *tvp = vp;
3751 		vp = vp->v_mount->mnt_vnodecovered;
3752 		VREF(vp);
3753 		fp->f_vnode = vp;
3754 		fp->f_data = vp;
3755 		fp->f_offset = 0;
3756 		vput(tvp);
3757 		VFS_UNLOCK_GIANT(vfslocked);
3758 		goto unionread;
3759 	}
3760 	VOP_UNLOCK(vp, 0, td);
3761 	VFS_UNLOCK_GIANT(vfslocked);
3762 	if (uap->basep != NULL) {
3763 		error = copyout(&loff, uap->basep, sizeof(long));
3764 	}
3765 	td->td_retval[0] = uap->count - auio.uio_resid;
3766 fail:
3767 	fdrop(fp, td);
3768 	return (error);
3769 }
3770 
3771 #ifndef _SYS_SYSPROTO_H_
3772 struct getdents_args {
3773 	int fd;
3774 	char *buf;
3775 	size_t count;
3776 };
3777 #endif
3778 int
3779 getdents(td, uap)
3780 	struct thread *td;
3781 	register struct getdents_args /* {
3782 		int fd;
3783 		char *buf;
3784 		u_int count;
3785 	} */ *uap;
3786 {
3787 	struct getdirentries_args ap;
3788 	ap.fd = uap->fd;
3789 	ap.buf = uap->buf;
3790 	ap.count = uap->count;
3791 	ap.basep = NULL;
3792 	return (getdirentries(td, &ap));
3793 }
3794 
3795 /*
3796  * Set the mode mask for creation of filesystem nodes.
3797  */
3798 #ifndef _SYS_SYSPROTO_H_
3799 struct umask_args {
3800 	int	newmask;
3801 };
3802 #endif
3803 int
3804 umask(td, uap)
3805 	struct thread *td;
3806 	struct umask_args /* {
3807 		int newmask;
3808 	} */ *uap;
3809 {
3810 	register struct filedesc *fdp;
3811 
3812 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3813 	fdp = td->td_proc->p_fd;
3814 	td->td_retval[0] = fdp->fd_cmask;
3815 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3816 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3817 	return (0);
3818 }
3819 
3820 /*
3821  * Void all references to file by ripping underlying filesystem away from
3822  * vnode.
3823  */
3824 #ifndef _SYS_SYSPROTO_H_
3825 struct revoke_args {
3826 	char	*path;
3827 };
3828 #endif
3829 int
3830 revoke(td, uap)
3831 	struct thread *td;
3832 	register struct revoke_args /* {
3833 		char *path;
3834 	} */ *uap;
3835 {
3836 	struct vnode *vp;
3837 	struct vattr vattr;
3838 	int error;
3839 	struct nameidata nd;
3840 	int vfslocked;
3841 
3842 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3843 	    UIO_USERSPACE, uap->path, td);
3844 	if ((error = namei(&nd)) != 0)
3845 		return (error);
3846 	vfslocked = NDHASGIANT(&nd);
3847 	vp = nd.ni_vp;
3848 	NDFREE(&nd, NDF_ONLY_PNBUF);
3849 	if (vp->v_type != VCHR) {
3850 		error = EINVAL;
3851 		goto out;
3852 	}
3853 #ifdef MAC
3854 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3855 	if (error)
3856 		goto out;
3857 #endif
3858 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3859 	if (error)
3860 		goto out;
3861 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3862 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3863 		    SUSER_ALLOWJAIL);
3864 		if (error)
3865 			goto out;
3866 	}
3867 	if (vcount(vp) > 1)
3868 		VOP_REVOKE(vp, REVOKEALL);
3869 out:
3870 	vput(vp);
3871 	VFS_UNLOCK_GIANT(vfslocked);
3872 	return (error);
3873 }
3874 
3875 /*
3876  * Convert a user file descriptor to a kernel file entry.
3877  * A reference on the file entry is held upon returning.
3878  */
3879 int
3880 getvnode(fdp, fd, fpp)
3881 	struct filedesc *fdp;
3882 	int fd;
3883 	struct file **fpp;
3884 {
3885 	int error;
3886 	struct file *fp;
3887 
3888 	fp = NULL;
3889 	if (fdp == NULL)
3890 		error = EBADF;
3891 	else {
3892 		FILEDESC_LOCK(fdp);
3893 		if ((u_int)fd >= fdp->fd_nfiles ||
3894 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3895 			error = EBADF;
3896 		else if (fp->f_vnode == NULL) {
3897 			fp = NULL;
3898 			error = EINVAL;
3899 		} else {
3900 			fhold(fp);
3901 			error = 0;
3902 		}
3903 		FILEDESC_UNLOCK(fdp);
3904 	}
3905 	*fpp = fp;
3906 	return (error);
3907 }
3908 
3909 /*
3910  * Get an (NFS) file handle.
3911  */
3912 #ifndef _SYS_SYSPROTO_H_
3913 struct lgetfh_args {
3914 	char	*fname;
3915 	fhandle_t *fhp;
3916 };
3917 #endif
3918 int
3919 lgetfh(td, uap)
3920 	struct thread *td;
3921 	register struct lgetfh_args *uap;
3922 {
3923 	struct nameidata nd;
3924 	fhandle_t fh;
3925 	register struct vnode *vp;
3926 	int vfslocked;
3927 	int error;
3928 
3929 	error = priv_check(td, PRIV_VFS_GETFH);
3930 	if (error)
3931 		return (error);
3932 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3933 	    UIO_USERSPACE, uap->fname, td);
3934 	error = namei(&nd);
3935 	if (error)
3936 		return (error);
3937 	vfslocked = NDHASGIANT(&nd);
3938 	NDFREE(&nd, NDF_ONLY_PNBUF);
3939 	vp = nd.ni_vp;
3940 	bzero(&fh, sizeof(fh));
3941 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3942 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3943 	vput(vp);
3944 	VFS_UNLOCK_GIANT(vfslocked);
3945 	if (error)
3946 		return (error);
3947 	error = copyout(&fh, uap->fhp, sizeof (fh));
3948 	return (error);
3949 }
3950 
3951 #ifndef _SYS_SYSPROTO_H_
3952 struct getfh_args {
3953 	char	*fname;
3954 	fhandle_t *fhp;
3955 };
3956 #endif
3957 int
3958 getfh(td, uap)
3959 	struct thread *td;
3960 	register struct getfh_args *uap;
3961 {
3962 	struct nameidata nd;
3963 	fhandle_t fh;
3964 	register struct vnode *vp;
3965 	int vfslocked;
3966 	int error;
3967 
3968 	error = priv_check(td, PRIV_VFS_GETFH);
3969 	if (error)
3970 		return (error);
3971 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3972 	    UIO_USERSPACE, uap->fname, td);
3973 	error = namei(&nd);
3974 	if (error)
3975 		return (error);
3976 	vfslocked = NDHASGIANT(&nd);
3977 	NDFREE(&nd, NDF_ONLY_PNBUF);
3978 	vp = nd.ni_vp;
3979 	bzero(&fh, sizeof(fh));
3980 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3981 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3982 	vput(vp);
3983 	VFS_UNLOCK_GIANT(vfslocked);
3984 	if (error)
3985 		return (error);
3986 	error = copyout(&fh, uap->fhp, sizeof (fh));
3987 	return (error);
3988 }
3989 
3990 /*
3991  * syscall for the rpc.lockd to use to translate a NFS file handle into an
3992  * open descriptor.
3993  *
3994  * warning: do not remove the priv_check() call or this becomes one giant
3995  * security hole.
3996  */
3997 #ifndef _SYS_SYSPROTO_H_
3998 struct fhopen_args {
3999 	const struct fhandle *u_fhp;
4000 	int flags;
4001 };
4002 #endif
4003 int
4004 fhopen(td, uap)
4005 	struct thread *td;
4006 	struct fhopen_args /* {
4007 		const struct fhandle *u_fhp;
4008 		int flags;
4009 	} */ *uap;
4010 {
4011 	struct proc *p = td->td_proc;
4012 	struct mount *mp;
4013 	struct vnode *vp;
4014 	struct fhandle fhp;
4015 	struct vattr vat;
4016 	struct vattr *vap = &vat;
4017 	struct flock lf;
4018 	struct file *fp;
4019 	register struct filedesc *fdp = p->p_fd;
4020 	int fmode, mode, error, type;
4021 	struct file *nfp;
4022 	int vfslocked;
4023 	int indx;
4024 
4025 	error = priv_check(td, PRIV_VFS_FHOPEN);
4026 	if (error)
4027 		return (error);
4028 	fmode = FFLAGS(uap->flags);
4029 	/* why not allow a non-read/write open for our lockd? */
4030 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4031 		return (EINVAL);
4032 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4033 	if (error)
4034 		return(error);
4035 	/* find the mount point */
4036 	mp = vfs_getvfs(&fhp.fh_fsid);
4037 	if (mp == NULL)
4038 		return (ESTALE);
4039 	vfslocked = VFS_LOCK_GIANT(mp);
4040 	/* now give me my vnode, it gets returned to me locked */
4041 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4042 	if (error)
4043 		goto out;
4044 	/*
4045 	 * from now on we have to make sure not
4046 	 * to forget about the vnode
4047 	 * any error that causes an abort must vput(vp)
4048 	 * just set error = err and 'goto bad;'.
4049 	 */
4050 
4051 	/*
4052 	 * from vn_open
4053 	 */
4054 	if (vp->v_type == VLNK) {
4055 		error = EMLINK;
4056 		goto bad;
4057 	}
4058 	if (vp->v_type == VSOCK) {
4059 		error = EOPNOTSUPP;
4060 		goto bad;
4061 	}
4062 	mode = 0;
4063 	if (fmode & (FWRITE | O_TRUNC)) {
4064 		if (vp->v_type == VDIR) {
4065 			error = EISDIR;
4066 			goto bad;
4067 		}
4068 		error = vn_writechk(vp);
4069 		if (error)
4070 			goto bad;
4071 		mode |= VWRITE;
4072 	}
4073 	if (fmode & FREAD)
4074 		mode |= VREAD;
4075 	if (fmode & O_APPEND)
4076 		mode |= VAPPEND;
4077 #ifdef MAC
4078 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4079 	if (error)
4080 		goto bad;
4081 #endif
4082 	if (mode) {
4083 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4084 		if (error)
4085 			goto bad;
4086 	}
4087 	if (fmode & O_TRUNC) {
4088 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4089 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4090 			vrele(vp);
4091 			goto out;
4092 		}
4093 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4094 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4095 #ifdef MAC
4096 		/*
4097 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4098 		 * should be right.
4099 		 */
4100 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4101 		if (error == 0) {
4102 #endif
4103 			VATTR_NULL(vap);
4104 			vap->va_size = 0;
4105 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4106 #ifdef MAC
4107 		}
4108 #endif
4109 		vn_finished_write(mp);
4110 		if (error)
4111 			goto bad;
4112 	}
4113 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4114 	if (error)
4115 		goto bad;
4116 
4117 	if (fmode & FWRITE)
4118 		vp->v_writecount++;
4119 
4120 	/*
4121 	 * end of vn_open code
4122 	 */
4123 
4124 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4125 		if (fmode & FWRITE)
4126 			vp->v_writecount--;
4127 		goto bad;
4128 	}
4129 	/* An extra reference on `nfp' has been held for us by falloc(). */
4130 	fp = nfp;
4131 
4132 	nfp->f_vnode = vp;
4133 	nfp->f_data = vp;
4134 	nfp->f_flag = fmode & FMASK;
4135 	nfp->f_ops = &vnops;
4136 	nfp->f_type = DTYPE_VNODE;
4137 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4138 		lf.l_whence = SEEK_SET;
4139 		lf.l_start = 0;
4140 		lf.l_len = 0;
4141 		if (fmode & O_EXLOCK)
4142 			lf.l_type = F_WRLCK;
4143 		else
4144 			lf.l_type = F_RDLCK;
4145 		type = F_FLOCK;
4146 		if ((fmode & FNONBLOCK) == 0)
4147 			type |= F_WAIT;
4148 		VOP_UNLOCK(vp, 0, td);
4149 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4150 			    type)) != 0) {
4151 			/*
4152 			 * The lock request failed.  Normally close the
4153 			 * descriptor but handle the case where someone might
4154 			 * have dup()d or close()d it when we weren't looking.
4155 			 */
4156 			fdclose(fdp, fp, indx, td);
4157 
4158 			/*
4159 			 * release our private reference
4160 			 */
4161 			fdrop(fp, td);
4162 			goto out;
4163 		}
4164 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4165 		fp->f_flag |= FHASLOCK;
4166 	}
4167 
4168 	VOP_UNLOCK(vp, 0, td);
4169 	fdrop(fp, td);
4170 	vfs_rel(mp);
4171 	VFS_UNLOCK_GIANT(vfslocked);
4172 	td->td_retval[0] = indx;
4173 	return (0);
4174 
4175 bad:
4176 	vput(vp);
4177 out:
4178 	vfs_rel(mp);
4179 	VFS_UNLOCK_GIANT(vfslocked);
4180 	return (error);
4181 }
4182 
4183 /*
4184  * Stat an (NFS) file handle.
4185  */
4186 #ifndef _SYS_SYSPROTO_H_
4187 struct fhstat_args {
4188 	struct fhandle *u_fhp;
4189 	struct stat *sb;
4190 };
4191 #endif
4192 int
4193 fhstat(td, uap)
4194 	struct thread *td;
4195 	register struct fhstat_args /* {
4196 		struct fhandle *u_fhp;
4197 		struct stat *sb;
4198 	} */ *uap;
4199 {
4200 	struct stat sb;
4201 	fhandle_t fh;
4202 	struct mount *mp;
4203 	struct vnode *vp;
4204 	int vfslocked;
4205 	int error;
4206 
4207 	error = priv_check(td, PRIV_VFS_FHSTAT);
4208 	if (error)
4209 		return (error);
4210 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4211 	if (error)
4212 		return (error);
4213 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4214 		return (ESTALE);
4215 	vfslocked = VFS_LOCK_GIANT(mp);
4216 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4217 		vfs_rel(mp);
4218 		VFS_UNLOCK_GIANT(vfslocked);
4219 		return (error);
4220 	}
4221 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4222 	vput(vp);
4223 	vfs_rel(mp);
4224 	VFS_UNLOCK_GIANT(vfslocked);
4225 	if (error)
4226 		return (error);
4227 	error = copyout(&sb, uap->sb, sizeof(sb));
4228 	return (error);
4229 }
4230 
4231 /*
4232  * Implement fstatfs() for (NFS) file handles.
4233  */
4234 #ifndef _SYS_SYSPROTO_H_
4235 struct fhstatfs_args {
4236 	struct fhandle *u_fhp;
4237 	struct statfs *buf;
4238 };
4239 #endif
4240 int
4241 fhstatfs(td, uap)
4242 	struct thread *td;
4243 	struct fhstatfs_args /* {
4244 		struct fhandle *u_fhp;
4245 		struct statfs *buf;
4246 	} */ *uap;
4247 {
4248 	struct statfs sf;
4249 	fhandle_t fh;
4250 	int error;
4251 
4252 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4253 	if (error)
4254 		return (error);
4255 	error = kern_fhstatfs(td, fh, &sf);
4256 	if (error)
4257 		return (error);
4258 	return (copyout(&sf, uap->buf, sizeof(sf)));
4259 }
4260 
4261 int
4262 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4263 {
4264 	struct statfs *sp;
4265 	struct mount *mp;
4266 	struct vnode *vp;
4267 	int vfslocked;
4268 	int error;
4269 
4270 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4271 	if (error)
4272 		return (error);
4273 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4274 		return (ESTALE);
4275 	vfslocked = VFS_LOCK_GIANT(mp);
4276 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4277 	if (error) {
4278 		VFS_UNLOCK_GIANT(vfslocked);
4279 		vfs_rel(mp);
4280 		return (error);
4281 	}
4282 	vput(vp);
4283 	error = prison_canseemount(td->td_ucred, mp);
4284 	if (error)
4285 		goto out;
4286 #ifdef MAC
4287 	error = mac_check_mount_stat(td->td_ucred, mp);
4288 	if (error)
4289 		goto out;
4290 #endif
4291 	/*
4292 	 * Set these in case the underlying filesystem fails to do so.
4293 	 */
4294 	sp = &mp->mnt_stat;
4295 	sp->f_version = STATFS_VERSION;
4296 	sp->f_namemax = NAME_MAX;
4297 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4298 	error = VFS_STATFS(mp, sp, td);
4299 	if (error == 0)
4300 		*buf = *sp;
4301 out:
4302 	vfs_rel(mp);
4303 	VFS_UNLOCK_GIANT(vfslocked);
4304 	return (error);
4305 }
4306