xref: /freebsd/sys/kern/vfs_syscalls.c (revision f856af0466c076beef4ea9b15d088e1119a945b8)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/limits.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/priv.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/jail.h>
67 #include <sys/syscallsubr.h>
68 #include <sys/sysctl.h>
69 
70 #include <machine/stdarg.h>
71 
72 #include <security/audit/audit.h>
73 #include <security/mac/mac_framework.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
91 
92 /*
93  * The module initialization routine for POSIX asynchronous I/O will
94  * set this to the version of AIO that it implements.  (Zero means
95  * that it is not implemented.)  This value is used here by pathconf()
96  * and in kern_descrip.c by fpathconf().
97  */
98 int async_io_version;
99 
100 /*
101  * Sync each mounted filesystem.
102  */
103 #ifndef _SYS_SYSPROTO_H_
104 struct sync_args {
105 	int     dummy;
106 };
107 #endif
108 
109 #ifdef DEBUG
110 static int syncprt = 0;
111 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
112 #endif
113 
114 /* ARGSUSED */
115 int
116 sync(td, uap)
117 	struct thread *td;
118 	struct sync_args *uap;
119 {
120 	struct mount *mp, *nmp;
121 	int vfslocked;
122 
123 	mtx_lock(&mountlist_mtx);
124 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
125 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
126 			nmp = TAILQ_NEXT(mp, mnt_list);
127 			continue;
128 		}
129 		vfslocked = VFS_LOCK_GIANT(mp);
130 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
131 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
132 			MNT_ILOCK(mp);
133 			mp->mnt_noasync++;
134 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
135 			MNT_IUNLOCK(mp);
136 			vfs_msync(mp, MNT_NOWAIT);
137 			VFS_SYNC(mp, MNT_NOWAIT, td);
138 			MNT_ILOCK(mp);
139 			mp->mnt_noasync--;
140 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
141 			    mp->mnt_noasync == 0)
142 				mp->mnt_kern_flag |= MNTK_ASYNC;
143 			MNT_IUNLOCK(mp);
144 			vn_finished_write(mp);
145 		}
146 		VFS_UNLOCK_GIANT(vfslocked);
147 		mtx_lock(&mountlist_mtx);
148 		nmp = TAILQ_NEXT(mp, mnt_list);
149 		vfs_unbusy(mp, td);
150 	}
151 	mtx_unlock(&mountlist_mtx);
152 	return (0);
153 }
154 
155 /* XXX PRISON: could be per prison flag */
156 static int prison_quotas;
157 #if 0
158 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
159 #endif
160 
161 /*
162  * Change filesystem quotas.
163  *
164  * MP SAFE
165  */
166 #ifndef _SYS_SYSPROTO_H_
167 struct quotactl_args {
168 	char *path;
169 	int cmd;
170 	int uid;
171 	caddr_t arg;
172 };
173 #endif
174 int
175 quotactl(td, uap)
176 	struct thread *td;
177 	register struct quotactl_args /* {
178 		char *path;
179 		int cmd;
180 		int uid;
181 		caddr_t arg;
182 	} */ *uap;
183 {
184 	struct mount *mp, *vmp;
185 	int vfslocked;
186 	int error;
187 	struct nameidata nd;
188 
189 	AUDIT_ARG(cmd, uap->cmd);
190 	AUDIT_ARG(uid, uap->uid);
191 	if (jailed(td->td_ucred) && !prison_quotas)
192 		return (EPERM);
193 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
194 	   UIO_USERSPACE, uap->path, td);
195 	if ((error = namei(&nd)) != 0)
196 		return (error);
197 	vfslocked = NDHASGIANT(&nd);
198 	NDFREE(&nd, NDF_ONLY_PNBUF);
199 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
200 	mp = nd.ni_vp->v_mount;
201 	vrele(nd.ni_vp);
202 	if (error)
203 		goto out;
204 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
205 	vn_finished_write(vmp);
206 out:
207 	VFS_UNLOCK_GIANT(vfslocked);
208 	return (error);
209 }
210 
211 /*
212  * Get filesystem statistics.
213  */
214 #ifndef _SYS_SYSPROTO_H_
215 struct statfs_args {
216 	char *path;
217 	struct statfs *buf;
218 };
219 #endif
220 int
221 statfs(td, uap)
222 	struct thread *td;
223 	register struct statfs_args /* {
224 		char *path;
225 		struct statfs *buf;
226 	} */ *uap;
227 {
228 	struct statfs sf;
229 	int error;
230 
231 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
232 	if (error == 0)
233 		error = copyout(&sf, uap->buf, sizeof(sf));
234 	return (error);
235 }
236 
237 int
238 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
239     struct statfs *buf)
240 {
241 	struct mount *mp;
242 	struct statfs *sp, sb;
243 	int vfslocked;
244 	int error;
245 	struct nameidata nd;
246 
247 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
248 	    pathseg, path, td);
249 	error = namei(&nd);
250 	if (error)
251 		return (error);
252 	vfslocked = NDHASGIANT(&nd);
253 	mp = nd.ni_vp->v_mount;
254 	vfs_ref(mp);
255 	NDFREE(&nd, NDF_ONLY_PNBUF);
256 	vput(nd.ni_vp);
257 #ifdef MAC
258 	error = mac_check_mount_stat(td->td_ucred, mp);
259 	if (error)
260 		goto out;
261 #endif
262 	/*
263 	 * Set these in case the underlying filesystem fails to do so.
264 	 */
265 	sp = &mp->mnt_stat;
266 	sp->f_version = STATFS_VERSION;
267 	sp->f_namemax = NAME_MAX;
268 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
269 	error = VFS_STATFS(mp, sp, td);
270 	if (error)
271 		goto out;
272 	if (priv_check(td, PRIV_VFS_GENERATION)) {
273 		bcopy(sp, &sb, sizeof(sb));
274 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
275 		prison_enforce_statfs(td->td_ucred, mp, &sb);
276 		sp = &sb;
277 	}
278 	*buf = *sp;
279 out:
280 	vfs_rel(mp);
281 	VFS_UNLOCK_GIANT(vfslocked);
282 	if (mtx_owned(&Giant))
283 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
284 	return (error);
285 }
286 
287 /*
288  * Get filesystem statistics.
289  */
290 #ifndef _SYS_SYSPROTO_H_
291 struct fstatfs_args {
292 	int fd;
293 	struct statfs *buf;
294 };
295 #endif
296 int
297 fstatfs(td, uap)
298 	struct thread *td;
299 	register struct fstatfs_args /* {
300 		int fd;
301 		struct statfs *buf;
302 	} */ *uap;
303 {
304 	struct statfs sf;
305 	int error;
306 
307 	error = kern_fstatfs(td, uap->fd, &sf);
308 	if (error == 0)
309 		error = copyout(&sf, uap->buf, sizeof(sf));
310 	return (error);
311 }
312 
313 int
314 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
315 {
316 	struct file *fp;
317 	struct mount *mp;
318 	struct statfs *sp, sb;
319 	int vfslocked;
320 	struct vnode *vp;
321 	int error;
322 
323 	AUDIT_ARG(fd, fd);
324 	error = getvnode(td->td_proc->p_fd, fd, &fp);
325 	if (error)
326 		return (error);
327 	vp = fp->f_vnode;
328 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
329 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
330 #ifdef AUDIT
331 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
332 #endif
333 	mp = vp->v_mount;
334 	if (mp)
335 		vfs_ref(mp);
336 	VOP_UNLOCK(vp, 0, td);
337 	fdrop(fp, td);
338 	if (vp->v_iflag & VI_DOOMED) {
339 		error = EBADF;
340 		goto out;
341 	}
342 #ifdef MAC
343 	error = mac_check_mount_stat(td->td_ucred, mp);
344 	if (error)
345 		goto out;
346 #endif
347 	/*
348 	 * Set these in case the underlying filesystem fails to do so.
349 	 */
350 	sp = &mp->mnt_stat;
351 	sp->f_version = STATFS_VERSION;
352 	sp->f_namemax = NAME_MAX;
353 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
354 	error = VFS_STATFS(mp, sp, td);
355 	if (error)
356 		goto out;
357 	if (priv_check(td, PRIV_VFS_GENERATION)) {
358 		bcopy(sp, &sb, sizeof(sb));
359 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
360 		prison_enforce_statfs(td->td_ucred, mp, &sb);
361 		sp = &sb;
362 	}
363 	*buf = *sp;
364 out:
365 	if (mp)
366 		vfs_rel(mp);
367 	VFS_UNLOCK_GIANT(vfslocked);
368 	return (error);
369 }
370 
371 /*
372  * Get statistics on all filesystems.
373  */
374 #ifndef _SYS_SYSPROTO_H_
375 struct getfsstat_args {
376 	struct statfs *buf;
377 	long bufsize;
378 	int flags;
379 };
380 #endif
381 int
382 getfsstat(td, uap)
383 	struct thread *td;
384 	register struct getfsstat_args /* {
385 		struct statfs *buf;
386 		long bufsize;
387 		int flags;
388 	} */ *uap;
389 {
390 
391 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
392 	    uap->flags));
393 }
394 
395 /*
396  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
397  * 	The caller is responsible for freeing memory which will be allocated
398  *	in '*buf'.
399  */
400 int
401 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
402     enum uio_seg bufseg, int flags)
403 {
404 	struct mount *mp, *nmp;
405 	struct statfs *sfsp, *sp, sb;
406 	size_t count, maxcount;
407 	int vfslocked;
408 	int error;
409 
410 	maxcount = bufsize / sizeof(struct statfs);
411 	if (bufsize == 0)
412 		sfsp = NULL;
413 	else if (bufseg == UIO_USERSPACE)
414 		sfsp = *buf;
415 	else /* if (bufseg == UIO_SYSSPACE) */ {
416 		count = 0;
417 		mtx_lock(&mountlist_mtx);
418 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
419 			count++;
420 		}
421 		mtx_unlock(&mountlist_mtx);
422 		if (maxcount > count)
423 			maxcount = count;
424 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
425 		    M_WAITOK);
426 	}
427 	count = 0;
428 	mtx_lock(&mountlist_mtx);
429 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
430 		if (prison_canseemount(td->td_ucred, mp) != 0) {
431 			nmp = TAILQ_NEXT(mp, mnt_list);
432 			continue;
433 		}
434 #ifdef MAC
435 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
436 			nmp = TAILQ_NEXT(mp, mnt_list);
437 			continue;
438 		}
439 #endif
440 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
441 			nmp = TAILQ_NEXT(mp, mnt_list);
442 			continue;
443 		}
444 		vfslocked = VFS_LOCK_GIANT(mp);
445 		if (sfsp && count < maxcount) {
446 			sp = &mp->mnt_stat;
447 			/*
448 			 * Set these in case the underlying filesystem
449 			 * fails to do so.
450 			 */
451 			sp->f_version = STATFS_VERSION;
452 			sp->f_namemax = NAME_MAX;
453 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
454 			/*
455 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
456 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
457 			 * overrides MNT_WAIT.
458 			 */
459 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
460 			    (flags & MNT_WAIT)) &&
461 			    (error = VFS_STATFS(mp, sp, td))) {
462 				VFS_UNLOCK_GIANT(vfslocked);
463 				mtx_lock(&mountlist_mtx);
464 				nmp = TAILQ_NEXT(mp, mnt_list);
465 				vfs_unbusy(mp, td);
466 				continue;
467 			}
468 			if (priv_check(td, PRIV_VFS_GENERATION)) {
469 				bcopy(sp, &sb, sizeof(sb));
470 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
471 				prison_enforce_statfs(td->td_ucred, mp, &sb);
472 				sp = &sb;
473 			}
474 			if (bufseg == UIO_SYSSPACE)
475 				bcopy(sp, sfsp, sizeof(*sp));
476 			else /* if (bufseg == UIO_USERSPACE) */ {
477 				error = copyout(sp, sfsp, sizeof(*sp));
478 				if (error) {
479 					vfs_unbusy(mp, td);
480 					VFS_UNLOCK_GIANT(vfslocked);
481 					return (error);
482 				}
483 			}
484 			sfsp++;
485 		}
486 		VFS_UNLOCK_GIANT(vfslocked);
487 		count++;
488 		mtx_lock(&mountlist_mtx);
489 		nmp = TAILQ_NEXT(mp, mnt_list);
490 		vfs_unbusy(mp, td);
491 	}
492 	mtx_unlock(&mountlist_mtx);
493 	if (sfsp && count > maxcount)
494 		td->td_retval[0] = maxcount;
495 	else
496 		td->td_retval[0] = count;
497 	return (0);
498 }
499 
500 #ifdef COMPAT_FREEBSD4
501 /*
502  * Get old format filesystem statistics.
503  */
504 static void cvtstatfs(struct statfs *, struct ostatfs *);
505 
506 #ifndef _SYS_SYSPROTO_H_
507 struct freebsd4_statfs_args {
508 	char *path;
509 	struct ostatfs *buf;
510 };
511 #endif
512 int
513 freebsd4_statfs(td, uap)
514 	struct thread *td;
515 	struct freebsd4_statfs_args /* {
516 		char *path;
517 		struct ostatfs *buf;
518 	} */ *uap;
519 {
520 	struct ostatfs osb;
521 	struct statfs sf;
522 	int error;
523 
524 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
525 	if (error)
526 		return (error);
527 	cvtstatfs(&sf, &osb);
528 	return (copyout(&osb, uap->buf, sizeof(osb)));
529 }
530 
531 /*
532  * Get filesystem statistics.
533  */
534 #ifndef _SYS_SYSPROTO_H_
535 struct freebsd4_fstatfs_args {
536 	int fd;
537 	struct ostatfs *buf;
538 };
539 #endif
540 int
541 freebsd4_fstatfs(td, uap)
542 	struct thread *td;
543 	struct freebsd4_fstatfs_args /* {
544 		int fd;
545 		struct ostatfs *buf;
546 	} */ *uap;
547 {
548 	struct ostatfs osb;
549 	struct statfs sf;
550 	int error;
551 
552 	error = kern_fstatfs(td, uap->fd, &sf);
553 	if (error)
554 		return (error);
555 	cvtstatfs(&sf, &osb);
556 	return (copyout(&osb, uap->buf, sizeof(osb)));
557 }
558 
559 /*
560  * Get statistics on all filesystems.
561  */
562 #ifndef _SYS_SYSPROTO_H_
563 struct freebsd4_getfsstat_args {
564 	struct ostatfs *buf;
565 	long bufsize;
566 	int flags;
567 };
568 #endif
569 int
570 freebsd4_getfsstat(td, uap)
571 	struct thread *td;
572 	register struct freebsd4_getfsstat_args /* {
573 		struct ostatfs *buf;
574 		long bufsize;
575 		int flags;
576 	} */ *uap;
577 {
578 	struct statfs *buf, *sp;
579 	struct ostatfs osb;
580 	size_t count, size;
581 	int error;
582 
583 	count = uap->bufsize / sizeof(struct ostatfs);
584 	size = count * sizeof(struct statfs);
585 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
586 	if (size > 0) {
587 		count = td->td_retval[0];
588 		sp = buf;
589 		while (count > 0 && error == 0) {
590 			cvtstatfs(sp, &osb);
591 			error = copyout(&osb, uap->buf, sizeof(osb));
592 			sp++;
593 			uap->buf++;
594 			count--;
595 		}
596 		free(buf, M_TEMP);
597 	}
598 	return (error);
599 }
600 
601 /*
602  * Implement fstatfs() for (NFS) file handles.
603  */
604 #ifndef _SYS_SYSPROTO_H_
605 struct freebsd4_fhstatfs_args {
606 	struct fhandle *u_fhp;
607 	struct ostatfs *buf;
608 };
609 #endif
610 int
611 freebsd4_fhstatfs(td, uap)
612 	struct thread *td;
613 	struct freebsd4_fhstatfs_args /* {
614 		struct fhandle *u_fhp;
615 		struct ostatfs *buf;
616 	} */ *uap;
617 {
618 	struct ostatfs osb;
619 	struct statfs sf;
620 	fhandle_t fh;
621 	int error;
622 
623 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
624 	if (error)
625 		return (error);
626 	error = kern_fhstatfs(td, fh, &sf);
627 	if (error)
628 		return (error);
629 	cvtstatfs(&sf, &osb);
630 	return (copyout(&osb, uap->buf, sizeof(osb)));
631 }
632 
633 /*
634  * Convert a new format statfs structure to an old format statfs structure.
635  */
636 static void
637 cvtstatfs(nsp, osp)
638 	struct statfs *nsp;
639 	struct ostatfs *osp;
640 {
641 
642 	bzero(osp, sizeof(*osp));
643 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
644 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
645 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
646 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
647 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
648 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
649 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
650 	osp->f_owner = nsp->f_owner;
651 	osp->f_type = nsp->f_type;
652 	osp->f_flags = nsp->f_flags;
653 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
654 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
655 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
656 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
657 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
658 	    MIN(MFSNAMELEN, OMFSNAMELEN));
659 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
660 	    MIN(MNAMELEN, OMNAMELEN));
661 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
662 	    MIN(MNAMELEN, OMNAMELEN));
663 	osp->f_fsid = nsp->f_fsid;
664 }
665 #endif /* COMPAT_FREEBSD4 */
666 
667 /*
668  * Change current working directory to a given file descriptor.
669  */
670 #ifndef _SYS_SYSPROTO_H_
671 struct fchdir_args {
672 	int	fd;
673 };
674 #endif
675 int
676 fchdir(td, uap)
677 	struct thread *td;
678 	struct fchdir_args /* {
679 		int fd;
680 	} */ *uap;
681 {
682 	register struct filedesc *fdp = td->td_proc->p_fd;
683 	struct vnode *vp, *tdp, *vpold;
684 	struct mount *mp;
685 	struct file *fp;
686 	int vfslocked;
687 	int error;
688 
689 	AUDIT_ARG(fd, uap->fd);
690 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
691 		return (error);
692 	vp = fp->f_vnode;
693 	VREF(vp);
694 	fdrop(fp, td);
695 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
696 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
697 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
698 	error = change_dir(vp, td);
699 	while (!error && (mp = vp->v_mountedhere) != NULL) {
700 		int tvfslocked;
701 		if (vfs_busy(mp, 0, 0, td))
702 			continue;
703 		tvfslocked = VFS_LOCK_GIANT(mp);
704 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
705 		vfs_unbusy(mp, td);
706 		if (error) {
707 			VFS_UNLOCK_GIANT(tvfslocked);
708 			break;
709 		}
710 		vput(vp);
711 		VFS_UNLOCK_GIANT(vfslocked);
712 		vp = tdp;
713 		vfslocked = tvfslocked;
714 	}
715 	if (error) {
716 		vput(vp);
717 		VFS_UNLOCK_GIANT(vfslocked);
718 		return (error);
719 	}
720 	VOP_UNLOCK(vp, 0, td);
721 	VFS_UNLOCK_GIANT(vfslocked);
722 	FILEDESC_LOCK_FAST(fdp);
723 	vpold = fdp->fd_cdir;
724 	fdp->fd_cdir = vp;
725 	FILEDESC_UNLOCK_FAST(fdp);
726 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
727 	vrele(vpold);
728 	VFS_UNLOCK_GIANT(vfslocked);
729 	return (0);
730 }
731 
732 /*
733  * Change current working directory (``.'').
734  */
735 #ifndef _SYS_SYSPROTO_H_
736 struct chdir_args {
737 	char	*path;
738 };
739 #endif
740 int
741 chdir(td, uap)
742 	struct thread *td;
743 	struct chdir_args /* {
744 		char *path;
745 	} */ *uap;
746 {
747 
748 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
749 }
750 
751 int
752 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
753 {
754 	register struct filedesc *fdp = td->td_proc->p_fd;
755 	int error;
756 	struct nameidata nd;
757 	struct vnode *vp;
758 	int vfslocked;
759 
760 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
761 	    pathseg, path, td);
762 	if ((error = namei(&nd)) != 0)
763 		return (error);
764 	vfslocked = NDHASGIANT(&nd);
765 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
766 		vput(nd.ni_vp);
767 		VFS_UNLOCK_GIANT(vfslocked);
768 		NDFREE(&nd, NDF_ONLY_PNBUF);
769 		return (error);
770 	}
771 	VOP_UNLOCK(nd.ni_vp, 0, td);
772 	VFS_UNLOCK_GIANT(vfslocked);
773 	NDFREE(&nd, NDF_ONLY_PNBUF);
774 	FILEDESC_LOCK_FAST(fdp);
775 	vp = fdp->fd_cdir;
776 	fdp->fd_cdir = nd.ni_vp;
777 	FILEDESC_UNLOCK_FAST(fdp);
778 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
779 	vrele(vp);
780 	VFS_UNLOCK_GIANT(vfslocked);
781 	return (0);
782 }
783 
784 /*
785  * Helper function for raised chroot(2) security function:  Refuse if
786  * any filedescriptors are open directories.
787  */
788 static int
789 chroot_refuse_vdir_fds(fdp)
790 	struct filedesc *fdp;
791 {
792 	struct vnode *vp;
793 	struct file *fp;
794 	int fd;
795 
796 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
797 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
798 		fp = fget_locked(fdp, fd);
799 		if (fp == NULL)
800 			continue;
801 		if (fp->f_type == DTYPE_VNODE) {
802 			vp = fp->f_vnode;
803 			if (vp->v_type == VDIR)
804 				return (EPERM);
805 		}
806 	}
807 	return (0);
808 }
809 
810 /*
811  * This sysctl determines if we will allow a process to chroot(2) if it
812  * has a directory open:
813  *	0: disallowed for all processes.
814  *	1: allowed for processes that were not already chroot(2)'ed.
815  *	2: allowed for all processes.
816  */
817 
818 static int chroot_allow_open_directories = 1;
819 
820 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
821      &chroot_allow_open_directories, 0, "");
822 
823 /*
824  * Change notion of root (``/'') directory.
825  */
826 #ifndef _SYS_SYSPROTO_H_
827 struct chroot_args {
828 	char	*path;
829 };
830 #endif
831 int
832 chroot(td, uap)
833 	struct thread *td;
834 	struct chroot_args /* {
835 		char *path;
836 	} */ *uap;
837 {
838 	int error;
839 	struct nameidata nd;
840 	int vfslocked;
841 
842 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
843 	    SUSER_ALLOWJAIL);
844 	if (error)
845 		return (error);
846 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
847 	    UIO_USERSPACE, uap->path, td);
848 	error = namei(&nd);
849 	if (error)
850 		goto error;
851 	vfslocked = NDHASGIANT(&nd);
852 	if ((error = change_dir(nd.ni_vp, td)) != 0)
853 		goto e_vunlock;
854 #ifdef MAC
855 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
856 		goto e_vunlock;
857 #endif
858 	VOP_UNLOCK(nd.ni_vp, 0, td);
859 	error = change_root(nd.ni_vp, td);
860 	vrele(nd.ni_vp);
861 	VFS_UNLOCK_GIANT(vfslocked);
862 	NDFREE(&nd, NDF_ONLY_PNBUF);
863 	return (error);
864 e_vunlock:
865 	vput(nd.ni_vp);
866 	VFS_UNLOCK_GIANT(vfslocked);
867 error:
868 	NDFREE(&nd, NDF_ONLY_PNBUF);
869 	return (error);
870 }
871 
872 /*
873  * Common routine for chroot and chdir.  Callers must provide a locked vnode
874  * instance.
875  */
876 int
877 change_dir(vp, td)
878 	struct vnode *vp;
879 	struct thread *td;
880 {
881 	int error;
882 
883 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
884 	if (vp->v_type != VDIR)
885 		return (ENOTDIR);
886 #ifdef MAC
887 	error = mac_check_vnode_chdir(td->td_ucred, vp);
888 	if (error)
889 		return (error);
890 #endif
891 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
892 	return (error);
893 }
894 
895 /*
896  * Common routine for kern_chroot() and jail_attach().  The caller is
897  * responsible for invoking priv_check() and mac_check_chroot() to authorize
898  * this operation.
899  */
900 int
901 change_root(vp, td)
902 	struct vnode *vp;
903 	struct thread *td;
904 {
905 	struct filedesc *fdp;
906 	struct vnode *oldvp;
907 	int vfslocked;
908 	int error;
909 
910 	VFS_ASSERT_GIANT(vp->v_mount);
911 	fdp = td->td_proc->p_fd;
912 	FILEDESC_LOCK(fdp);
913 	if (chroot_allow_open_directories == 0 ||
914 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
915 		error = chroot_refuse_vdir_fds(fdp);
916 		if (error) {
917 			FILEDESC_UNLOCK(fdp);
918 			return (error);
919 		}
920 	}
921 	oldvp = fdp->fd_rdir;
922 	fdp->fd_rdir = vp;
923 	VREF(fdp->fd_rdir);
924 	if (!fdp->fd_jdir) {
925 		fdp->fd_jdir = vp;
926 		VREF(fdp->fd_jdir);
927 	}
928 	FILEDESC_UNLOCK(fdp);
929 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
930 	vrele(oldvp);
931 	VFS_UNLOCK_GIANT(vfslocked);
932 	return (0);
933 }
934 
935 /*
936  * Check permissions, allocate an open file structure,
937  * and call the device open routine if any.
938  *
939  * MP SAFE
940  */
941 #ifndef _SYS_SYSPROTO_H_
942 struct open_args {
943 	char	*path;
944 	int	flags;
945 	int	mode;
946 };
947 #endif
948 int
949 open(td, uap)
950 	struct thread *td;
951 	register struct open_args /* {
952 		char *path;
953 		int flags;
954 		int mode;
955 	} */ *uap;
956 {
957 
958 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
959 }
960 
961 int
962 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
963     int mode)
964 {
965 	struct proc *p = td->td_proc;
966 	struct filedesc *fdp = p->p_fd;
967 	struct file *fp;
968 	struct vnode *vp;
969 	struct vattr vat;
970 	struct mount *mp;
971 	int cmode;
972 	struct file *nfp;
973 	int type, indx, error;
974 	struct flock lf;
975 	struct nameidata nd;
976 	int vfslocked;
977 
978 	AUDIT_ARG(fflags, flags);
979 	AUDIT_ARG(mode, mode);
980 	if ((flags & O_ACCMODE) == O_ACCMODE)
981 		return (EINVAL);
982 	flags = FFLAGS(flags);
983 	error = falloc(td, &nfp, &indx);
984 	if (error)
985 		return (error);
986 	/* An extra reference on `nfp' has been held for us by falloc(). */
987 	fp = nfp;
988 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
989 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
990 	td->td_dupfd = -1;		/* XXX check for fdopen */
991 	error = vn_open(&nd, &flags, cmode, indx);
992 	if (error) {
993 		/*
994 		 * If the vn_open replaced the method vector, something
995 		 * wonderous happened deep below and we just pass it up
996 		 * pretending we know what we do.
997 		 */
998 		if (error == ENXIO && fp->f_ops != &badfileops) {
999 			fdrop(fp, td);
1000 			td->td_retval[0] = indx;
1001 			return (0);
1002 		}
1003 
1004 		/*
1005 		 * release our own reference
1006 		 */
1007 		fdrop(fp, td);
1008 
1009 		/*
1010 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1011 		 * responsible for dropping the old contents of ofiles[indx]
1012 		 * if it succeeds.
1013 		 */
1014 		if ((error == ENODEV || error == ENXIO) &&
1015 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1016 		    (error =
1017 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1018 			td->td_retval[0] = indx;
1019 			return (0);
1020 		}
1021 		/*
1022 		 * Clean up the descriptor, but only if another thread hadn't
1023 		 * replaced or closed it.
1024 		 */
1025 		fdclose(fdp, fp, indx, td);
1026 
1027 		if (error == ERESTART)
1028 			error = EINTR;
1029 		return (error);
1030 	}
1031 	td->td_dupfd = 0;
1032 	vfslocked = NDHASGIANT(&nd);
1033 	NDFREE(&nd, NDF_ONLY_PNBUF);
1034 	vp = nd.ni_vp;
1035 
1036 	/*
1037 	 * There should be 2 references on the file, one from the descriptor
1038 	 * table, and one for us.
1039 	 *
1040 	 * Handle the case where someone closed the file (via its file
1041 	 * descriptor) while we were blocked.  The end result should look
1042 	 * like opening the file succeeded but it was immediately closed.
1043 	 * We call vn_close() manually because we haven't yet hooked up
1044 	 * the various 'struct file' fields.
1045 	 */
1046 	FILEDESC_LOCK(fdp);
1047 	FILE_LOCK(fp);
1048 	if (fp->f_count == 1) {
1049 		mp = vp->v_mount;
1050 		KASSERT(fdp->fd_ofiles[indx] != fp,
1051 		    ("Open file descriptor lost all refs"));
1052 		FILE_UNLOCK(fp);
1053 		FILEDESC_UNLOCK(fdp);
1054 		VOP_UNLOCK(vp, 0, td);
1055 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1056 		VFS_UNLOCK_GIANT(vfslocked);
1057 		fdrop(fp, td);
1058 		td->td_retval[0] = indx;
1059 		return (0);
1060 	}
1061 	fp->f_vnode = vp;
1062 	if (fp->f_data == NULL)
1063 		fp->f_data = vp;
1064 	fp->f_flag = flags & FMASK;
1065 	if (fp->f_ops == &badfileops)
1066 		fp->f_ops = &vnops;
1067 	fp->f_seqcount = 1;
1068 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1069 	FILE_UNLOCK(fp);
1070 	FILEDESC_UNLOCK(fdp);
1071 
1072 	VOP_UNLOCK(vp, 0, td);
1073 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1074 		lf.l_whence = SEEK_SET;
1075 		lf.l_start = 0;
1076 		lf.l_len = 0;
1077 		if (flags & O_EXLOCK)
1078 			lf.l_type = F_WRLCK;
1079 		else
1080 			lf.l_type = F_RDLCK;
1081 		type = F_FLOCK;
1082 		if ((flags & FNONBLOCK) == 0)
1083 			type |= F_WAIT;
1084 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1085 			    type)) != 0)
1086 			goto bad;
1087 		fp->f_flag |= FHASLOCK;
1088 	}
1089 	if (flags & O_TRUNC) {
1090 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1091 			goto bad;
1092 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1093 		VATTR_NULL(&vat);
1094 		vat.va_size = 0;
1095 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1096 #ifdef MAC
1097 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1098 		if (error == 0)
1099 #endif
1100 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1101 		VOP_UNLOCK(vp, 0, td);
1102 		vn_finished_write(mp);
1103 		if (error)
1104 			goto bad;
1105 	}
1106 	VFS_UNLOCK_GIANT(vfslocked);
1107 	/*
1108 	 * Release our private reference, leaving the one associated with
1109 	 * the descriptor table intact.
1110 	 */
1111 	fdrop(fp, td);
1112 	td->td_retval[0] = indx;
1113 	return (0);
1114 bad:
1115 	VFS_UNLOCK_GIANT(vfslocked);
1116 	fdclose(fdp, fp, indx, td);
1117 	fdrop(fp, td);
1118 	return (error);
1119 }
1120 
1121 #ifdef COMPAT_43
1122 /*
1123  * Create a file.
1124  *
1125  * MP SAFE
1126  */
1127 #ifndef _SYS_SYSPROTO_H_
1128 struct ocreat_args {
1129 	char	*path;
1130 	int	mode;
1131 };
1132 #endif
1133 int
1134 ocreat(td, uap)
1135 	struct thread *td;
1136 	register struct ocreat_args /* {
1137 		char *path;
1138 		int mode;
1139 	} */ *uap;
1140 {
1141 
1142 	return (kern_open(td, uap->path, UIO_USERSPACE,
1143 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1144 }
1145 #endif /* COMPAT_43 */
1146 
1147 /*
1148  * Create a special file.
1149  */
1150 #ifndef _SYS_SYSPROTO_H_
1151 struct mknod_args {
1152 	char	*path;
1153 	int	mode;
1154 	int	dev;
1155 };
1156 #endif
1157 int
1158 mknod(td, uap)
1159 	struct thread *td;
1160 	register struct mknod_args /* {
1161 		char *path;
1162 		int mode;
1163 		int dev;
1164 	} */ *uap;
1165 {
1166 
1167 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1168 }
1169 
1170 int
1171 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1172     int dev)
1173 {
1174 	struct vnode *vp;
1175 	struct mount *mp;
1176 	struct vattr vattr;
1177 	int error;
1178 	int whiteout = 0;
1179 	struct nameidata nd;
1180 	int vfslocked;
1181 
1182 	AUDIT_ARG(mode, mode);
1183 	AUDIT_ARG(dev, dev);
1184 	switch (mode & S_IFMT) {
1185 	case S_IFCHR:
1186 	case S_IFBLK:
1187 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1188 		break;
1189 	case S_IFMT:
1190 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1191 		break;
1192 	case S_IFWHT:
1193 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1194 		break;
1195 	default:
1196 		error = EINVAL;
1197 		break;
1198 	}
1199 	if (error)
1200 		return (error);
1201 restart:
1202 	bwillwrite();
1203 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1204 	    pathseg, path, td);
1205 	if ((error = namei(&nd)) != 0)
1206 		return (error);
1207 	vfslocked = NDHASGIANT(&nd);
1208 	vp = nd.ni_vp;
1209 	if (vp != NULL) {
1210 		NDFREE(&nd, NDF_ONLY_PNBUF);
1211 		if (vp == nd.ni_dvp)
1212 			vrele(nd.ni_dvp);
1213 		else
1214 			vput(nd.ni_dvp);
1215 		vrele(vp);
1216 		VFS_UNLOCK_GIANT(vfslocked);
1217 		return (EEXIST);
1218 	} else {
1219 		VATTR_NULL(&vattr);
1220 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1221 		vattr.va_mode = (mode & ALLPERMS) &
1222 		    ~td->td_proc->p_fd->fd_cmask;
1223 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1224 		vattr.va_rdev = dev;
1225 		whiteout = 0;
1226 
1227 		switch (mode & S_IFMT) {
1228 		case S_IFMT:	/* used by badsect to flag bad sectors */
1229 			vattr.va_type = VBAD;
1230 			break;
1231 		case S_IFCHR:
1232 			vattr.va_type = VCHR;
1233 			break;
1234 		case S_IFBLK:
1235 			vattr.va_type = VBLK;
1236 			break;
1237 		case S_IFWHT:
1238 			whiteout = 1;
1239 			break;
1240 		default:
1241 			panic("kern_mknod: invalid mode");
1242 		}
1243 	}
1244 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1245 		NDFREE(&nd, NDF_ONLY_PNBUF);
1246 		vput(nd.ni_dvp);
1247 		VFS_UNLOCK_GIANT(vfslocked);
1248 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1249 			return (error);
1250 		goto restart;
1251 	}
1252 #ifdef MAC
1253 	if (error == 0 && !whiteout)
1254 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1255 		    &nd.ni_cnd, &vattr);
1256 #endif
1257 	if (!error) {
1258 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1259 		if (whiteout)
1260 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1261 		else {
1262 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1263 						&nd.ni_cnd, &vattr);
1264 			if (error == 0)
1265 				vput(nd.ni_vp);
1266 		}
1267 	}
1268 	NDFREE(&nd, NDF_ONLY_PNBUF);
1269 	vput(nd.ni_dvp);
1270 	vn_finished_write(mp);
1271 	VFS_UNLOCK_GIANT(vfslocked);
1272 	return (error);
1273 }
1274 
1275 /*
1276  * Create a named pipe.
1277  */
1278 #ifndef _SYS_SYSPROTO_H_
1279 struct mkfifo_args {
1280 	char	*path;
1281 	int	mode;
1282 };
1283 #endif
1284 int
1285 mkfifo(td, uap)
1286 	struct thread *td;
1287 	register struct mkfifo_args /* {
1288 		char *path;
1289 		int mode;
1290 	} */ *uap;
1291 {
1292 
1293 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1294 }
1295 
1296 int
1297 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1298 {
1299 	struct mount *mp;
1300 	struct vattr vattr;
1301 	int error;
1302 	struct nameidata nd;
1303 	int vfslocked;
1304 
1305 	AUDIT_ARG(mode, mode);
1306 restart:
1307 	bwillwrite();
1308 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1309 	    pathseg, path, td);
1310 	if ((error = namei(&nd)) != 0)
1311 		return (error);
1312 	vfslocked = NDHASGIANT(&nd);
1313 	if (nd.ni_vp != NULL) {
1314 		NDFREE(&nd, NDF_ONLY_PNBUF);
1315 		if (nd.ni_vp == nd.ni_dvp)
1316 			vrele(nd.ni_dvp);
1317 		else
1318 			vput(nd.ni_dvp);
1319 		vrele(nd.ni_vp);
1320 		VFS_UNLOCK_GIANT(vfslocked);
1321 		return (EEXIST);
1322 	}
1323 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1324 		NDFREE(&nd, NDF_ONLY_PNBUF);
1325 		vput(nd.ni_dvp);
1326 		VFS_UNLOCK_GIANT(vfslocked);
1327 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1328 			return (error);
1329 		goto restart;
1330 	}
1331 	VATTR_NULL(&vattr);
1332 	vattr.va_type = VFIFO;
1333 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1334 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1335 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1336 #ifdef MAC
1337 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1338 	    &vattr);
1339 	if (error)
1340 		goto out;
1341 #endif
1342 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1343 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1344 	if (error == 0)
1345 		vput(nd.ni_vp);
1346 #ifdef MAC
1347 out:
1348 #endif
1349 	vput(nd.ni_dvp);
1350 	vn_finished_write(mp);
1351 	VFS_UNLOCK_GIANT(vfslocked);
1352 	NDFREE(&nd, NDF_ONLY_PNBUF);
1353 	return (error);
1354 }
1355 
1356 /*
1357  * Make a hard file link.
1358  */
1359 #ifndef _SYS_SYSPROTO_H_
1360 struct link_args {
1361 	char	*path;
1362 	char	*link;
1363 };
1364 #endif
1365 int
1366 link(td, uap)
1367 	struct thread *td;
1368 	register struct link_args /* {
1369 		char *path;
1370 		char *link;
1371 	} */ *uap;
1372 {
1373 	int error;
1374 
1375 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1376 	return (error);
1377 }
1378 
1379 static int hardlink_check_uid = 0;
1380 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1381     &hardlink_check_uid, 0,
1382     "Unprivileged processes cannot create hard links to files owned by other "
1383     "users");
1384 static int hardlink_check_gid = 0;
1385 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1386     &hardlink_check_gid, 0,
1387     "Unprivileged processes cannot create hard links to files owned by other "
1388     "groups");
1389 
1390 static int
1391 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1392 {
1393 	struct vattr va;
1394 	int error;
1395 
1396 	if (!hardlink_check_uid && !hardlink_check_gid)
1397 		return (0);
1398 
1399 	error = VOP_GETATTR(vp, &va, cred, td);
1400 	if (error != 0)
1401 		return (error);
1402 
1403 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1404 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1405 		    SUSER_ALLOWJAIL);
1406 		if (error)
1407 			return (error);
1408 	}
1409 
1410 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1411 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1412 		    SUSER_ALLOWJAIL);
1413 		if (error)
1414 			return (error);
1415 	}
1416 
1417 	return (0);
1418 }
1419 
1420 int
1421 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1422 {
1423 	struct vnode *vp;
1424 	struct mount *mp;
1425 	struct nameidata nd;
1426 	int vfslocked;
1427 	int lvfslocked;
1428 	int error;
1429 
1430 	bwillwrite();
1431 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1432 	if ((error = namei(&nd)) != 0)
1433 		return (error);
1434 	vfslocked = NDHASGIANT(&nd);
1435 	NDFREE(&nd, NDF_ONLY_PNBUF);
1436 	vp = nd.ni_vp;
1437 	if (vp->v_type == VDIR) {
1438 		vrele(vp);
1439 		VFS_UNLOCK_GIANT(vfslocked);
1440 		return (EPERM);		/* POSIX */
1441 	}
1442 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1443 		vrele(vp);
1444 		VFS_UNLOCK_GIANT(vfslocked);
1445 		return (error);
1446 	}
1447 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1448 	    segflg, link, td);
1449 	if ((error = namei(&nd)) == 0) {
1450 		lvfslocked = NDHASGIANT(&nd);
1451 		if (nd.ni_vp != NULL) {
1452 			if (nd.ni_dvp == nd.ni_vp)
1453 				vrele(nd.ni_dvp);
1454 			else
1455 				vput(nd.ni_dvp);
1456 			vrele(nd.ni_vp);
1457 			error = EEXIST;
1458 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1459 		    == 0) {
1460 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1461 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1462 			error = can_hardlink(vp, td, td->td_ucred);
1463 			if (error == 0)
1464 #ifdef MAC
1465 				error = mac_check_vnode_link(td->td_ucred,
1466 				    nd.ni_dvp, vp, &nd.ni_cnd);
1467 			if (error == 0)
1468 #endif
1469 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1470 			VOP_UNLOCK(vp, 0, td);
1471 			vput(nd.ni_dvp);
1472 		}
1473 		NDFREE(&nd, NDF_ONLY_PNBUF);
1474 		VFS_UNLOCK_GIANT(lvfslocked);
1475 	}
1476 	vrele(vp);
1477 	vn_finished_write(mp);
1478 	VFS_UNLOCK_GIANT(vfslocked);
1479 	return (error);
1480 }
1481 
1482 /*
1483  * Make a symbolic link.
1484  */
1485 #ifndef _SYS_SYSPROTO_H_
1486 struct symlink_args {
1487 	char	*path;
1488 	char	*link;
1489 };
1490 #endif
1491 int
1492 symlink(td, uap)
1493 	struct thread *td;
1494 	register struct symlink_args /* {
1495 		char *path;
1496 		char *link;
1497 	} */ *uap;
1498 {
1499 
1500 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1501 }
1502 
1503 int
1504 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1505 {
1506 	struct mount *mp;
1507 	struct vattr vattr;
1508 	char *syspath;
1509 	int error;
1510 	struct nameidata nd;
1511 	int vfslocked;
1512 
1513 	if (segflg == UIO_SYSSPACE) {
1514 		syspath = path;
1515 	} else {
1516 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1517 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1518 			goto out;
1519 	}
1520 	AUDIT_ARG(text, syspath);
1521 restart:
1522 	bwillwrite();
1523 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1524 	    segflg, link, td);
1525 	if ((error = namei(&nd)) != 0)
1526 		goto out;
1527 	vfslocked = NDHASGIANT(&nd);
1528 	if (nd.ni_vp) {
1529 		NDFREE(&nd, NDF_ONLY_PNBUF);
1530 		if (nd.ni_vp == nd.ni_dvp)
1531 			vrele(nd.ni_dvp);
1532 		else
1533 			vput(nd.ni_dvp);
1534 		vrele(nd.ni_vp);
1535 		VFS_UNLOCK_GIANT(vfslocked);
1536 		error = EEXIST;
1537 		goto out;
1538 	}
1539 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1540 		NDFREE(&nd, NDF_ONLY_PNBUF);
1541 		vput(nd.ni_dvp);
1542 		VFS_UNLOCK_GIANT(vfslocked);
1543 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1544 			goto out;
1545 		goto restart;
1546 	}
1547 	VATTR_NULL(&vattr);
1548 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1549 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1550 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1551 #ifdef MAC
1552 	vattr.va_type = VLNK;
1553 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1554 	    &vattr);
1555 	if (error)
1556 		goto out2;
1557 #endif
1558 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1559 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1560 	if (error == 0)
1561 		vput(nd.ni_vp);
1562 #ifdef MAC
1563 out2:
1564 #endif
1565 	NDFREE(&nd, NDF_ONLY_PNBUF);
1566 	vput(nd.ni_dvp);
1567 	vn_finished_write(mp);
1568 	VFS_UNLOCK_GIANT(vfslocked);
1569 out:
1570 	if (segflg != UIO_SYSSPACE)
1571 		uma_zfree(namei_zone, syspath);
1572 	return (error);
1573 }
1574 
1575 /*
1576  * Delete a whiteout from the filesystem.
1577  */
1578 int
1579 undelete(td, uap)
1580 	struct thread *td;
1581 	register struct undelete_args /* {
1582 		char *path;
1583 	} */ *uap;
1584 {
1585 	int error;
1586 	struct mount *mp;
1587 	struct nameidata nd;
1588 	int vfslocked;
1589 
1590 restart:
1591 	bwillwrite();
1592 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1593 	    UIO_USERSPACE, uap->path, td);
1594 	error = namei(&nd);
1595 	if (error)
1596 		return (error);
1597 	vfslocked = NDHASGIANT(&nd);
1598 
1599 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1600 		NDFREE(&nd, NDF_ONLY_PNBUF);
1601 		if (nd.ni_vp == nd.ni_dvp)
1602 			vrele(nd.ni_dvp);
1603 		else
1604 			vput(nd.ni_dvp);
1605 		if (nd.ni_vp)
1606 			vrele(nd.ni_vp);
1607 		VFS_UNLOCK_GIANT(vfslocked);
1608 		return (EEXIST);
1609 	}
1610 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1611 		NDFREE(&nd, NDF_ONLY_PNBUF);
1612 		vput(nd.ni_dvp);
1613 		VFS_UNLOCK_GIANT(vfslocked);
1614 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1615 			return (error);
1616 		goto restart;
1617 	}
1618 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1619 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1620 	NDFREE(&nd, NDF_ONLY_PNBUF);
1621 	vput(nd.ni_dvp);
1622 	vn_finished_write(mp);
1623 	VFS_UNLOCK_GIANT(vfslocked);
1624 	return (error);
1625 }
1626 
1627 /*
1628  * Delete a name from the filesystem.
1629  */
1630 #ifndef _SYS_SYSPROTO_H_
1631 struct unlink_args {
1632 	char	*path;
1633 };
1634 #endif
1635 int
1636 unlink(td, uap)
1637 	struct thread *td;
1638 	struct unlink_args /* {
1639 		char *path;
1640 	} */ *uap;
1641 {
1642 	int error;
1643 
1644 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1645 	return (error);
1646 }
1647 
1648 int
1649 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1650 {
1651 	struct mount *mp;
1652 	struct vnode *vp;
1653 	int error;
1654 	struct nameidata nd;
1655 	int vfslocked;
1656 
1657 restart:
1658 	bwillwrite();
1659 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1660 	    pathseg, path, td);
1661 	if ((error = namei(&nd)) != 0)
1662 		return (error == EINVAL ? EPERM : error);
1663 	vfslocked = NDHASGIANT(&nd);
1664 	vp = nd.ni_vp;
1665 	if (vp->v_type == VDIR)
1666 		error = EPERM;		/* POSIX */
1667 	else {
1668 		/*
1669 		 * The root of a mounted filesystem cannot be deleted.
1670 		 *
1671 		 * XXX: can this only be a VDIR case?
1672 		 */
1673 		if (vp->v_vflag & VV_ROOT)
1674 			error = EBUSY;
1675 	}
1676 	if (error == 0) {
1677 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1678 			NDFREE(&nd, NDF_ONLY_PNBUF);
1679 			vput(nd.ni_dvp);
1680 			if (vp == nd.ni_dvp)
1681 				vrele(vp);
1682 			else
1683 				vput(vp);
1684 			VFS_UNLOCK_GIANT(vfslocked);
1685 			if ((error = vn_start_write(NULL, &mp,
1686 			    V_XSLEEP | PCATCH)) != 0)
1687 				return (error);
1688 			goto restart;
1689 		}
1690 #ifdef MAC
1691 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1692 		    &nd.ni_cnd);
1693 		if (error)
1694 			goto out;
1695 #endif
1696 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1697 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1698 #ifdef MAC
1699 out:
1700 #endif
1701 		vn_finished_write(mp);
1702 	}
1703 	NDFREE(&nd, NDF_ONLY_PNBUF);
1704 	vput(nd.ni_dvp);
1705 	if (vp == nd.ni_dvp)
1706 		vrele(vp);
1707 	else
1708 		vput(vp);
1709 	VFS_UNLOCK_GIANT(vfslocked);
1710 	return (error);
1711 }
1712 
1713 /*
1714  * Reposition read/write file offset.
1715  */
1716 #ifndef _SYS_SYSPROTO_H_
1717 struct lseek_args {
1718 	int	fd;
1719 	int	pad;
1720 	off_t	offset;
1721 	int	whence;
1722 };
1723 #endif
1724 int
1725 lseek(td, uap)
1726 	struct thread *td;
1727 	register struct lseek_args /* {
1728 		int fd;
1729 		int pad;
1730 		off_t offset;
1731 		int whence;
1732 	} */ *uap;
1733 {
1734 	struct ucred *cred = td->td_ucred;
1735 	struct file *fp;
1736 	struct vnode *vp;
1737 	struct vattr vattr;
1738 	off_t offset;
1739 	int error, noneg;
1740 	int vfslocked;
1741 
1742 	if ((error = fget(td, uap->fd, &fp)) != 0)
1743 		return (error);
1744 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1745 		fdrop(fp, td);
1746 		return (ESPIPE);
1747 	}
1748 	vp = fp->f_vnode;
1749 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1750 	noneg = (vp->v_type != VCHR);
1751 	offset = uap->offset;
1752 	switch (uap->whence) {
1753 	case L_INCR:
1754 		if (noneg &&
1755 		    (fp->f_offset < 0 ||
1756 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1757 			error = EOVERFLOW;
1758 			break;
1759 		}
1760 		offset += fp->f_offset;
1761 		break;
1762 	case L_XTND:
1763 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1764 		error = VOP_GETATTR(vp, &vattr, cred, td);
1765 		VOP_UNLOCK(vp, 0, td);
1766 		if (error)
1767 			break;
1768 		if (noneg &&
1769 		    (vattr.va_size > OFF_MAX ||
1770 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1771 			error = EOVERFLOW;
1772 			break;
1773 		}
1774 		offset += vattr.va_size;
1775 		break;
1776 	case L_SET:
1777 		break;
1778 	default:
1779 		error = EINVAL;
1780 	}
1781 	if (error == 0 && noneg && offset < 0)
1782 		error = EINVAL;
1783 	if (error != 0)
1784 		goto drop;
1785 	fp->f_offset = offset;
1786 	*(off_t *)(td->td_retval) = fp->f_offset;
1787 drop:
1788 	fdrop(fp, td);
1789 	VFS_UNLOCK_GIANT(vfslocked);
1790 	return (error);
1791 }
1792 
1793 #if defined(COMPAT_43)
1794 /*
1795  * Reposition read/write file offset.
1796  */
1797 #ifndef _SYS_SYSPROTO_H_
1798 struct olseek_args {
1799 	int	fd;
1800 	long	offset;
1801 	int	whence;
1802 };
1803 #endif
1804 int
1805 olseek(td, uap)
1806 	struct thread *td;
1807 	register struct olseek_args /* {
1808 		int fd;
1809 		long offset;
1810 		int whence;
1811 	} */ *uap;
1812 {
1813 	struct lseek_args /* {
1814 		int fd;
1815 		int pad;
1816 		off_t offset;
1817 		int whence;
1818 	} */ nuap;
1819 	int error;
1820 
1821 	nuap.fd = uap->fd;
1822 	nuap.offset = uap->offset;
1823 	nuap.whence = uap->whence;
1824 	error = lseek(td, &nuap);
1825 	return (error);
1826 }
1827 #endif /* COMPAT_43 */
1828 
1829 /*
1830  * Check access permissions using passed credentials.
1831  */
1832 static int
1833 vn_access(vp, user_flags, cred, td)
1834 	struct vnode	*vp;
1835 	int		user_flags;
1836 	struct ucred	*cred;
1837 	struct thread	*td;
1838 {
1839 	int error, flags;
1840 
1841 	/* Flags == 0 means only check for existence. */
1842 	error = 0;
1843 	if (user_flags) {
1844 		flags = 0;
1845 		if (user_flags & R_OK)
1846 			flags |= VREAD;
1847 		if (user_flags & W_OK)
1848 			flags |= VWRITE;
1849 		if (user_flags & X_OK)
1850 			flags |= VEXEC;
1851 #ifdef MAC
1852 		error = mac_check_vnode_access(cred, vp, flags);
1853 		if (error)
1854 			return (error);
1855 #endif
1856 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1857 			error = VOP_ACCESS(vp, flags, cred, td);
1858 	}
1859 	return (error);
1860 }
1861 
1862 /*
1863  * Check access permissions using "real" credentials.
1864  */
1865 #ifndef _SYS_SYSPROTO_H_
1866 struct access_args {
1867 	char	*path;
1868 	int	flags;
1869 };
1870 #endif
1871 int
1872 access(td, uap)
1873 	struct thread *td;
1874 	register struct access_args /* {
1875 		char *path;
1876 		int flags;
1877 	} */ *uap;
1878 {
1879 
1880 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1881 }
1882 
1883 int
1884 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1885 {
1886 	struct ucred *cred, *tmpcred;
1887 	register struct vnode *vp;
1888 	struct nameidata nd;
1889 	int vfslocked;
1890 	int error;
1891 
1892 	/*
1893 	 * Create and modify a temporary credential instead of one that
1894 	 * is potentially shared.  This could also mess up socket
1895 	 * buffer accounting which can run in an interrupt context.
1896 	 */
1897 	cred = td->td_ucred;
1898 	tmpcred = crdup(cred);
1899 	tmpcred->cr_uid = cred->cr_ruid;
1900 	tmpcred->cr_groups[0] = cred->cr_rgid;
1901 	td->td_ucred = tmpcred;
1902 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1903 	    pathseg, path, td);
1904 	if ((error = namei(&nd)) != 0)
1905 		goto out1;
1906 	vfslocked = NDHASGIANT(&nd);
1907 	vp = nd.ni_vp;
1908 
1909 	error = vn_access(vp, flags, tmpcred, td);
1910 	NDFREE(&nd, NDF_ONLY_PNBUF);
1911 	vput(vp);
1912 	VFS_UNLOCK_GIANT(vfslocked);
1913 out1:
1914 	td->td_ucred = cred;
1915 	crfree(tmpcred);
1916 	return (error);
1917 }
1918 
1919 /*
1920  * Check access permissions using "effective" credentials.
1921  */
1922 #ifndef _SYS_SYSPROTO_H_
1923 struct eaccess_args {
1924 	char	*path;
1925 	int	flags;
1926 };
1927 #endif
1928 int
1929 eaccess(td, uap)
1930 	struct thread *td;
1931 	register struct eaccess_args /* {
1932 		char *path;
1933 		int flags;
1934 	} */ *uap;
1935 {
1936 
1937 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1938 }
1939 
1940 int
1941 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1942 {
1943 	struct nameidata nd;
1944 	struct vnode *vp;
1945 	int vfslocked;
1946 	int error;
1947 
1948 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1949 	    pathseg, path, td);
1950 	if ((error = namei(&nd)) != 0)
1951 		return (error);
1952 	vp = nd.ni_vp;
1953 	vfslocked = NDHASGIANT(&nd);
1954 	error = vn_access(vp, flags, td->td_ucred, td);
1955 	NDFREE(&nd, NDF_ONLY_PNBUF);
1956 	vput(vp);
1957 	VFS_UNLOCK_GIANT(vfslocked);
1958 	return (error);
1959 }
1960 
1961 #if defined(COMPAT_43)
1962 /*
1963  * Get file status; this version follows links.
1964  */
1965 #ifndef _SYS_SYSPROTO_H_
1966 struct ostat_args {
1967 	char	*path;
1968 	struct ostat *ub;
1969 };
1970 #endif
1971 int
1972 ostat(td, uap)
1973 	struct thread *td;
1974 	register struct ostat_args /* {
1975 		char *path;
1976 		struct ostat *ub;
1977 	} */ *uap;
1978 {
1979 	struct stat sb;
1980 	struct ostat osb;
1981 	int error;
1982 
1983 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1984 	if (error)
1985 		return (error);
1986 	cvtstat(&sb, &osb);
1987 	error = copyout(&osb, uap->ub, sizeof (osb));
1988 	return (error);
1989 }
1990 
1991 /*
1992  * Get file status; this version does not follow links.
1993  */
1994 #ifndef _SYS_SYSPROTO_H_
1995 struct olstat_args {
1996 	char	*path;
1997 	struct ostat *ub;
1998 };
1999 #endif
2000 int
2001 olstat(td, uap)
2002 	struct thread *td;
2003 	register struct olstat_args /* {
2004 		char *path;
2005 		struct ostat *ub;
2006 	} */ *uap;
2007 {
2008 	struct stat sb;
2009 	struct ostat osb;
2010 	int error;
2011 
2012 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2013 	if (error)
2014 		return (error);
2015 	cvtstat(&sb, &osb);
2016 	error = copyout(&osb, uap->ub, sizeof (osb));
2017 	return (error);
2018 }
2019 
2020 /*
2021  * Convert from an old to a new stat structure.
2022  */
2023 void
2024 cvtstat(st, ost)
2025 	struct stat *st;
2026 	struct ostat *ost;
2027 {
2028 
2029 	ost->st_dev = st->st_dev;
2030 	ost->st_ino = st->st_ino;
2031 	ost->st_mode = st->st_mode;
2032 	ost->st_nlink = st->st_nlink;
2033 	ost->st_uid = st->st_uid;
2034 	ost->st_gid = st->st_gid;
2035 	ost->st_rdev = st->st_rdev;
2036 	if (st->st_size < (quad_t)1 << 32)
2037 		ost->st_size = st->st_size;
2038 	else
2039 		ost->st_size = -2;
2040 	ost->st_atime = st->st_atime;
2041 	ost->st_mtime = st->st_mtime;
2042 	ost->st_ctime = st->st_ctime;
2043 	ost->st_blksize = st->st_blksize;
2044 	ost->st_blocks = st->st_blocks;
2045 	ost->st_flags = st->st_flags;
2046 	ost->st_gen = st->st_gen;
2047 }
2048 #endif /* COMPAT_43 */
2049 
2050 /*
2051  * Get file status; this version follows links.
2052  */
2053 #ifndef _SYS_SYSPROTO_H_
2054 struct stat_args {
2055 	char	*path;
2056 	struct stat *ub;
2057 };
2058 #endif
2059 int
2060 stat(td, uap)
2061 	struct thread *td;
2062 	register struct stat_args /* {
2063 		char *path;
2064 		struct stat *ub;
2065 	} */ *uap;
2066 {
2067 	struct stat sb;
2068 	int error;
2069 
2070 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2071 	if (error == 0)
2072 		error = copyout(&sb, uap->ub, sizeof (sb));
2073 	return (error);
2074 }
2075 
2076 int
2077 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2078 {
2079 	struct nameidata nd;
2080 	struct stat sb;
2081 	int error, vfslocked;
2082 
2083 	NDINIT(&nd, LOOKUP,
2084 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2085 	    pathseg, path, td);
2086 	if ((error = namei(&nd)) != 0)
2087 		return (error);
2088 	vfslocked = NDHASGIANT(&nd);
2089 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2090 	NDFREE(&nd, NDF_ONLY_PNBUF);
2091 	vput(nd.ni_vp);
2092 	VFS_UNLOCK_GIANT(vfslocked);
2093 	if (mtx_owned(&Giant))
2094 		printf("stat(%d): %s\n", vfslocked, path);
2095 	if (error)
2096 		return (error);
2097 	*sbp = sb;
2098 	return (0);
2099 }
2100 
2101 /*
2102  * Get file status; this version does not follow links.
2103  */
2104 #ifndef _SYS_SYSPROTO_H_
2105 struct lstat_args {
2106 	char	*path;
2107 	struct stat *ub;
2108 };
2109 #endif
2110 int
2111 lstat(td, uap)
2112 	struct thread *td;
2113 	register struct lstat_args /* {
2114 		char *path;
2115 		struct stat *ub;
2116 	} */ *uap;
2117 {
2118 	struct stat sb;
2119 	int error;
2120 
2121 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2122 	if (error == 0)
2123 		error = copyout(&sb, uap->ub, sizeof (sb));
2124 	return (error);
2125 }
2126 
2127 int
2128 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2129 {
2130 	struct vnode *vp;
2131 	struct stat sb;
2132 	struct nameidata nd;
2133 	int error, vfslocked;
2134 
2135 	NDINIT(&nd, LOOKUP,
2136 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2137 	    pathseg, path, td);
2138 	if ((error = namei(&nd)) != 0)
2139 		return (error);
2140 	vfslocked = NDHASGIANT(&nd);
2141 	vp = nd.ni_vp;
2142 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2143 	NDFREE(&nd, NDF_ONLY_PNBUF);
2144 	vput(vp);
2145 	VFS_UNLOCK_GIANT(vfslocked);
2146 	if (error)
2147 		return (error);
2148 	*sbp = sb;
2149 	return (0);
2150 }
2151 
2152 /*
2153  * Implementation of the NetBSD [l]stat() functions.
2154  */
2155 void
2156 cvtnstat(sb, nsb)
2157 	struct stat *sb;
2158 	struct nstat *nsb;
2159 {
2160 	bzero(nsb, sizeof *nsb);
2161 	nsb->st_dev = sb->st_dev;
2162 	nsb->st_ino = sb->st_ino;
2163 	nsb->st_mode = sb->st_mode;
2164 	nsb->st_nlink = sb->st_nlink;
2165 	nsb->st_uid = sb->st_uid;
2166 	nsb->st_gid = sb->st_gid;
2167 	nsb->st_rdev = sb->st_rdev;
2168 	nsb->st_atimespec = sb->st_atimespec;
2169 	nsb->st_mtimespec = sb->st_mtimespec;
2170 	nsb->st_ctimespec = sb->st_ctimespec;
2171 	nsb->st_size = sb->st_size;
2172 	nsb->st_blocks = sb->st_blocks;
2173 	nsb->st_blksize = sb->st_blksize;
2174 	nsb->st_flags = sb->st_flags;
2175 	nsb->st_gen = sb->st_gen;
2176 	nsb->st_birthtimespec = sb->st_birthtimespec;
2177 }
2178 
2179 #ifndef _SYS_SYSPROTO_H_
2180 struct nstat_args {
2181 	char	*path;
2182 	struct nstat *ub;
2183 };
2184 #endif
2185 int
2186 nstat(td, uap)
2187 	struct thread *td;
2188 	register struct nstat_args /* {
2189 		char *path;
2190 		struct nstat *ub;
2191 	} */ *uap;
2192 {
2193 	struct stat sb;
2194 	struct nstat nsb;
2195 	int error;
2196 
2197 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2198 	if (error)
2199 		return (error);
2200 	cvtnstat(&sb, &nsb);
2201 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2202 	return (error);
2203 }
2204 
2205 /*
2206  * NetBSD lstat.  Get file status; this version does not follow links.
2207  */
2208 #ifndef _SYS_SYSPROTO_H_
2209 struct lstat_args {
2210 	char	*path;
2211 	struct stat *ub;
2212 };
2213 #endif
2214 int
2215 nlstat(td, uap)
2216 	struct thread *td;
2217 	register struct nlstat_args /* {
2218 		char *path;
2219 		struct nstat *ub;
2220 	} */ *uap;
2221 {
2222 	struct stat sb;
2223 	struct nstat nsb;
2224 	int error;
2225 
2226 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2227 	if (error)
2228 		return (error);
2229 	cvtnstat(&sb, &nsb);
2230 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2231 	return (error);
2232 }
2233 
2234 /*
2235  * Get configurable pathname variables.
2236  */
2237 #ifndef _SYS_SYSPROTO_H_
2238 struct pathconf_args {
2239 	char	*path;
2240 	int	name;
2241 };
2242 #endif
2243 int
2244 pathconf(td, uap)
2245 	struct thread *td;
2246 	register struct pathconf_args /* {
2247 		char *path;
2248 		int name;
2249 	} */ *uap;
2250 {
2251 
2252 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2253 }
2254 
2255 int
2256 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2257 {
2258 	struct nameidata nd;
2259 	int error, vfslocked;
2260 
2261 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2262 	    pathseg, path, td);
2263 	if ((error = namei(&nd)) != 0)
2264 		return (error);
2265 	vfslocked = NDHASGIANT(&nd);
2266 	NDFREE(&nd, NDF_ONLY_PNBUF);
2267 
2268 	/* If asynchronous I/O is available, it works for all files. */
2269 	if (name == _PC_ASYNC_IO)
2270 		td->td_retval[0] = async_io_version;
2271 	else
2272 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2273 	vput(nd.ni_vp);
2274 	VFS_UNLOCK_GIANT(vfslocked);
2275 	return (error);
2276 }
2277 
2278 /*
2279  * Return target name of a symbolic link.
2280  */
2281 #ifndef _SYS_SYSPROTO_H_
2282 struct readlink_args {
2283 	char	*path;
2284 	char	*buf;
2285 	int	count;
2286 };
2287 #endif
2288 int
2289 readlink(td, uap)
2290 	struct thread *td;
2291 	register struct readlink_args /* {
2292 		char *path;
2293 		char *buf;
2294 		int count;
2295 	} */ *uap;
2296 {
2297 
2298 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2299 	    UIO_USERSPACE, uap->count));
2300 }
2301 
2302 int
2303 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2304     enum uio_seg bufseg, int count)
2305 {
2306 	register struct vnode *vp;
2307 	struct iovec aiov;
2308 	struct uio auio;
2309 	int error;
2310 	struct nameidata nd;
2311 	int vfslocked;
2312 
2313 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2314 	    pathseg, path, td);
2315 	if ((error = namei(&nd)) != 0)
2316 		return (error);
2317 	NDFREE(&nd, NDF_ONLY_PNBUF);
2318 	vfslocked = NDHASGIANT(&nd);
2319 	vp = nd.ni_vp;
2320 #ifdef MAC
2321 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2322 	if (error) {
2323 		vput(vp);
2324 		VFS_UNLOCK_GIANT(vfslocked);
2325 		return (error);
2326 	}
2327 #endif
2328 	if (vp->v_type != VLNK)
2329 		error = EINVAL;
2330 	else {
2331 		aiov.iov_base = buf;
2332 		aiov.iov_len = count;
2333 		auio.uio_iov = &aiov;
2334 		auio.uio_iovcnt = 1;
2335 		auio.uio_offset = 0;
2336 		auio.uio_rw = UIO_READ;
2337 		auio.uio_segflg = bufseg;
2338 		auio.uio_td = td;
2339 		auio.uio_resid = count;
2340 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2341 	}
2342 	vput(vp);
2343 	VFS_UNLOCK_GIANT(vfslocked);
2344 	td->td_retval[0] = count - auio.uio_resid;
2345 	return (error);
2346 }
2347 
2348 /*
2349  * Common implementation code for chflags() and fchflags().
2350  */
2351 static int
2352 setfflags(td, vp, flags)
2353 	struct thread *td;
2354 	struct vnode *vp;
2355 	int flags;
2356 {
2357 	int error;
2358 	struct mount *mp;
2359 	struct vattr vattr;
2360 
2361 	/*
2362 	 * Prevent non-root users from setting flags on devices.  When
2363 	 * a device is reused, users can retain ownership of the device
2364 	 * if they are allowed to set flags and programs assume that
2365 	 * chown can't fail when done as root.
2366 	 */
2367 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2368 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2369 		    SUSER_ALLOWJAIL);
2370 		if (error)
2371 			return (error);
2372 	}
2373 
2374 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2375 		return (error);
2376 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2377 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2378 	VATTR_NULL(&vattr);
2379 	vattr.va_flags = flags;
2380 #ifdef MAC
2381 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2382 	if (error == 0)
2383 #endif
2384 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2385 	VOP_UNLOCK(vp, 0, td);
2386 	vn_finished_write(mp);
2387 	return (error);
2388 }
2389 
2390 /*
2391  * Change flags of a file given a path name.
2392  */
2393 #ifndef _SYS_SYSPROTO_H_
2394 struct chflags_args {
2395 	char	*path;
2396 	int	flags;
2397 };
2398 #endif
2399 int
2400 chflags(td, uap)
2401 	struct thread *td;
2402 	register struct chflags_args /* {
2403 		char *path;
2404 		int flags;
2405 	} */ *uap;
2406 {
2407 	int error;
2408 	struct nameidata nd;
2409 	int vfslocked;
2410 
2411 	AUDIT_ARG(fflags, uap->flags);
2412 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2413 	    uap->path, td);
2414 	if ((error = namei(&nd)) != 0)
2415 		return (error);
2416 	NDFREE(&nd, NDF_ONLY_PNBUF);
2417 	vfslocked = NDHASGIANT(&nd);
2418 	error = setfflags(td, nd.ni_vp, uap->flags);
2419 	vrele(nd.ni_vp);
2420 	VFS_UNLOCK_GIANT(vfslocked);
2421 	return (error);
2422 }
2423 
2424 /*
2425  * Same as chflags() but doesn't follow symlinks.
2426  */
2427 int
2428 lchflags(td, uap)
2429 	struct thread *td;
2430 	register struct lchflags_args /* {
2431 		char *path;
2432 		int flags;
2433 	} */ *uap;
2434 {
2435 	int error;
2436 	struct nameidata nd;
2437 	int vfslocked;
2438 
2439 	AUDIT_ARG(fflags, uap->flags);
2440 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2441 	    uap->path, td);
2442 	if ((error = namei(&nd)) != 0)
2443 		return (error);
2444 	vfslocked = NDHASGIANT(&nd);
2445 	NDFREE(&nd, NDF_ONLY_PNBUF);
2446 	error = setfflags(td, nd.ni_vp, uap->flags);
2447 	vrele(nd.ni_vp);
2448 	VFS_UNLOCK_GIANT(vfslocked);
2449 	return (error);
2450 }
2451 
2452 /*
2453  * Change flags of a file given a file descriptor.
2454  */
2455 #ifndef _SYS_SYSPROTO_H_
2456 struct fchflags_args {
2457 	int	fd;
2458 	int	flags;
2459 };
2460 #endif
2461 int
2462 fchflags(td, uap)
2463 	struct thread *td;
2464 	register struct fchflags_args /* {
2465 		int fd;
2466 		int flags;
2467 	} */ *uap;
2468 {
2469 	struct file *fp;
2470 	int vfslocked;
2471 	int error;
2472 
2473 	AUDIT_ARG(fd, uap->fd);
2474 	AUDIT_ARG(fflags, uap->flags);
2475 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2476 		return (error);
2477 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2478 #ifdef AUDIT
2479 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2480 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2481 	VOP_UNLOCK(fp->f_vnode, 0, td);
2482 #endif
2483 	error = setfflags(td, fp->f_vnode, uap->flags);
2484 	VFS_UNLOCK_GIANT(vfslocked);
2485 	fdrop(fp, td);
2486 	return (error);
2487 }
2488 
2489 /*
2490  * Common implementation code for chmod(), lchmod() and fchmod().
2491  */
2492 static int
2493 setfmode(td, vp, mode)
2494 	struct thread *td;
2495 	struct vnode *vp;
2496 	int mode;
2497 {
2498 	int error;
2499 	struct mount *mp;
2500 	struct vattr vattr;
2501 
2502 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2503 		return (error);
2504 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2505 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2506 	VATTR_NULL(&vattr);
2507 	vattr.va_mode = mode & ALLPERMS;
2508 #ifdef MAC
2509 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2510 	if (error == 0)
2511 #endif
2512 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2513 	VOP_UNLOCK(vp, 0, td);
2514 	vn_finished_write(mp);
2515 	return (error);
2516 }
2517 
2518 /*
2519  * Change mode of a file given path name.
2520  */
2521 #ifndef _SYS_SYSPROTO_H_
2522 struct chmod_args {
2523 	char	*path;
2524 	int	mode;
2525 };
2526 #endif
2527 int
2528 chmod(td, uap)
2529 	struct thread *td;
2530 	register struct chmod_args /* {
2531 		char *path;
2532 		int mode;
2533 	} */ *uap;
2534 {
2535 
2536 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2537 }
2538 
2539 int
2540 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2541 {
2542 	int error;
2543 	struct nameidata nd;
2544 	int vfslocked;
2545 
2546 	AUDIT_ARG(mode, mode);
2547 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2548 	if ((error = namei(&nd)) != 0)
2549 		return (error);
2550 	vfslocked = NDHASGIANT(&nd);
2551 	NDFREE(&nd, NDF_ONLY_PNBUF);
2552 	error = setfmode(td, nd.ni_vp, mode);
2553 	vrele(nd.ni_vp);
2554 	VFS_UNLOCK_GIANT(vfslocked);
2555 	return (error);
2556 }
2557 
2558 /*
2559  * Change mode of a file given path name (don't follow links.)
2560  */
2561 #ifndef _SYS_SYSPROTO_H_
2562 struct lchmod_args {
2563 	char	*path;
2564 	int	mode;
2565 };
2566 #endif
2567 int
2568 lchmod(td, uap)
2569 	struct thread *td;
2570 	register struct lchmod_args /* {
2571 		char *path;
2572 		int mode;
2573 	} */ *uap;
2574 {
2575 	int error;
2576 	struct nameidata nd;
2577 	int vfslocked;
2578 
2579 	AUDIT_ARG(mode, (mode_t)uap->mode);
2580 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2581 	    uap->path, td);
2582 	if ((error = namei(&nd)) != 0)
2583 		return (error);
2584 	vfslocked = NDHASGIANT(&nd);
2585 	NDFREE(&nd, NDF_ONLY_PNBUF);
2586 	error = setfmode(td, nd.ni_vp, uap->mode);
2587 	vrele(nd.ni_vp);
2588 	VFS_UNLOCK_GIANT(vfslocked);
2589 	return (error);
2590 }
2591 
2592 /*
2593  * Change mode of a file given a file descriptor.
2594  */
2595 #ifndef _SYS_SYSPROTO_H_
2596 struct fchmod_args {
2597 	int	fd;
2598 	int	mode;
2599 };
2600 #endif
2601 int
2602 fchmod(td, uap)
2603 	struct thread *td;
2604 	register struct fchmod_args /* {
2605 		int fd;
2606 		int mode;
2607 	} */ *uap;
2608 {
2609 	struct file *fp;
2610 	int vfslocked;
2611 	int error;
2612 
2613 	AUDIT_ARG(fd, uap->fd);
2614 	AUDIT_ARG(mode, uap->mode);
2615 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2616 		return (error);
2617 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2618 #ifdef AUDIT
2619 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2620 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2621 	VOP_UNLOCK(fp->f_vnode, 0, td);
2622 #endif
2623 	error = setfmode(td, fp->f_vnode, uap->mode);
2624 	VFS_UNLOCK_GIANT(vfslocked);
2625 	fdrop(fp, td);
2626 	return (error);
2627 }
2628 
2629 /*
2630  * Common implementation for chown(), lchown(), and fchown()
2631  */
2632 static int
2633 setfown(td, vp, uid, gid)
2634 	struct thread *td;
2635 	struct vnode *vp;
2636 	uid_t uid;
2637 	gid_t gid;
2638 {
2639 	int error;
2640 	struct mount *mp;
2641 	struct vattr vattr;
2642 
2643 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2644 		return (error);
2645 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2646 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2647 	VATTR_NULL(&vattr);
2648 	vattr.va_uid = uid;
2649 	vattr.va_gid = gid;
2650 #ifdef MAC
2651 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2652 	    vattr.va_gid);
2653 	if (error == 0)
2654 #endif
2655 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2656 	VOP_UNLOCK(vp, 0, td);
2657 	vn_finished_write(mp);
2658 	return (error);
2659 }
2660 
2661 /*
2662  * Set ownership given a path name.
2663  */
2664 #ifndef _SYS_SYSPROTO_H_
2665 struct chown_args {
2666 	char	*path;
2667 	int	uid;
2668 	int	gid;
2669 };
2670 #endif
2671 int
2672 chown(td, uap)
2673 	struct thread *td;
2674 	register struct chown_args /* {
2675 		char *path;
2676 		int uid;
2677 		int gid;
2678 	} */ *uap;
2679 {
2680 
2681 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2682 }
2683 
2684 int
2685 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2686     int gid)
2687 {
2688 	int error;
2689 	struct nameidata nd;
2690 	int vfslocked;
2691 
2692 	AUDIT_ARG(owner, uid, gid);
2693 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2694 	if ((error = namei(&nd)) != 0)
2695 		return (error);
2696 	vfslocked = NDHASGIANT(&nd);
2697 	NDFREE(&nd, NDF_ONLY_PNBUF);
2698 	error = setfown(td, nd.ni_vp, uid, gid);
2699 	vrele(nd.ni_vp);
2700 	VFS_UNLOCK_GIANT(vfslocked);
2701 	return (error);
2702 }
2703 
2704 /*
2705  * Set ownership given a path name, do not cross symlinks.
2706  */
2707 #ifndef _SYS_SYSPROTO_H_
2708 struct lchown_args {
2709 	char	*path;
2710 	int	uid;
2711 	int	gid;
2712 };
2713 #endif
2714 int
2715 lchown(td, uap)
2716 	struct thread *td;
2717 	register struct lchown_args /* {
2718 		char *path;
2719 		int uid;
2720 		int gid;
2721 	} */ *uap;
2722 {
2723 
2724 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2725 }
2726 
2727 int
2728 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2729     int gid)
2730 {
2731 	int error;
2732 	struct nameidata nd;
2733 	int vfslocked;
2734 
2735 	AUDIT_ARG(owner, uid, gid);
2736 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2737 	if ((error = namei(&nd)) != 0)
2738 		return (error);
2739 	vfslocked = NDHASGIANT(&nd);
2740 	NDFREE(&nd, NDF_ONLY_PNBUF);
2741 	error = setfown(td, nd.ni_vp, uid, gid);
2742 	vrele(nd.ni_vp);
2743 	VFS_UNLOCK_GIANT(vfslocked);
2744 	return (error);
2745 }
2746 
2747 /*
2748  * Set ownership given a file descriptor.
2749  */
2750 #ifndef _SYS_SYSPROTO_H_
2751 struct fchown_args {
2752 	int	fd;
2753 	int	uid;
2754 	int	gid;
2755 };
2756 #endif
2757 int
2758 fchown(td, uap)
2759 	struct thread *td;
2760 	register struct fchown_args /* {
2761 		int fd;
2762 		int uid;
2763 		int gid;
2764 	} */ *uap;
2765 {
2766 	struct file *fp;
2767 	int vfslocked;
2768 	int error;
2769 
2770 	AUDIT_ARG(fd, uap->fd);
2771 	AUDIT_ARG(owner, uap->uid, uap->gid);
2772 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2773 		return (error);
2774 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2775 #ifdef AUDIT
2776 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2777 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2778 	VOP_UNLOCK(fp->f_vnode, 0, td);
2779 #endif
2780 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2781 	VFS_UNLOCK_GIANT(vfslocked);
2782 	fdrop(fp, td);
2783 	return (error);
2784 }
2785 
2786 /*
2787  * Common implementation code for utimes(), lutimes(), and futimes().
2788  */
2789 static int
2790 getutimes(usrtvp, tvpseg, tsp)
2791 	const struct timeval *usrtvp;
2792 	enum uio_seg tvpseg;
2793 	struct timespec *tsp;
2794 {
2795 	struct timeval tv[2];
2796 	const struct timeval *tvp;
2797 	int error;
2798 
2799 	if (usrtvp == NULL) {
2800 		microtime(&tv[0]);
2801 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2802 		tsp[1] = tsp[0];
2803 	} else {
2804 		if (tvpseg == UIO_SYSSPACE) {
2805 			tvp = usrtvp;
2806 		} else {
2807 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2808 				return (error);
2809 			tvp = tv;
2810 		}
2811 
2812 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2813 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2814 			return (EINVAL);
2815 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2816 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2817 	}
2818 	return (0);
2819 }
2820 
2821 /*
2822  * Common implementation code for utimes(), lutimes(), and futimes().
2823  */
2824 static int
2825 setutimes(td, vp, ts, numtimes, nullflag)
2826 	struct thread *td;
2827 	struct vnode *vp;
2828 	const struct timespec *ts;
2829 	int numtimes;
2830 	int nullflag;
2831 {
2832 	int error, setbirthtime;
2833 	struct mount *mp;
2834 	struct vattr vattr;
2835 
2836 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2837 		return (error);
2838 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2839 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2840 	setbirthtime = 0;
2841 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2842 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2843 		setbirthtime = 1;
2844 	VATTR_NULL(&vattr);
2845 	vattr.va_atime = ts[0];
2846 	vattr.va_mtime = ts[1];
2847 	if (setbirthtime)
2848 		vattr.va_birthtime = ts[1];
2849 	if (numtimes > 2)
2850 		vattr.va_birthtime = ts[2];
2851 	if (nullflag)
2852 		vattr.va_vaflags |= VA_UTIMES_NULL;
2853 #ifdef MAC
2854 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2855 	    vattr.va_mtime);
2856 #endif
2857 	if (error == 0)
2858 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2859 	VOP_UNLOCK(vp, 0, td);
2860 	vn_finished_write(mp);
2861 	return (error);
2862 }
2863 
2864 /*
2865  * Set the access and modification times of a file.
2866  */
2867 #ifndef _SYS_SYSPROTO_H_
2868 struct utimes_args {
2869 	char	*path;
2870 	struct	timeval *tptr;
2871 };
2872 #endif
2873 int
2874 utimes(td, uap)
2875 	struct thread *td;
2876 	register struct utimes_args /* {
2877 		char *path;
2878 		struct timeval *tptr;
2879 	} */ *uap;
2880 {
2881 
2882 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2883 	    UIO_USERSPACE));
2884 }
2885 
2886 int
2887 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2888     struct timeval *tptr, enum uio_seg tptrseg)
2889 {
2890 	struct timespec ts[2];
2891 	int error;
2892 	struct nameidata nd;
2893 	int vfslocked;
2894 
2895 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2896 		return (error);
2897 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2898 	if ((error = namei(&nd)) != 0)
2899 		return (error);
2900 	vfslocked = NDHASGIANT(&nd);
2901 	NDFREE(&nd, NDF_ONLY_PNBUF);
2902 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2903 	vrele(nd.ni_vp);
2904 	VFS_UNLOCK_GIANT(vfslocked);
2905 	return (error);
2906 }
2907 
2908 /*
2909  * Set the access and modification times of a file.
2910  */
2911 #ifndef _SYS_SYSPROTO_H_
2912 struct lutimes_args {
2913 	char	*path;
2914 	struct	timeval *tptr;
2915 };
2916 #endif
2917 int
2918 lutimes(td, uap)
2919 	struct thread *td;
2920 	register struct lutimes_args /* {
2921 		char *path;
2922 		struct timeval *tptr;
2923 	} */ *uap;
2924 {
2925 
2926 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2927 	    UIO_USERSPACE));
2928 }
2929 
2930 int
2931 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2932     struct timeval *tptr, enum uio_seg tptrseg)
2933 {
2934 	struct timespec ts[2];
2935 	int error;
2936 	struct nameidata nd;
2937 	int vfslocked;
2938 
2939 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2940 		return (error);
2941 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2942 	if ((error = namei(&nd)) != 0)
2943 		return (error);
2944 	vfslocked = NDHASGIANT(&nd);
2945 	NDFREE(&nd, NDF_ONLY_PNBUF);
2946 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2947 	vrele(nd.ni_vp);
2948 	VFS_UNLOCK_GIANT(vfslocked);
2949 	return (error);
2950 }
2951 
2952 /*
2953  * Set the access and modification times of a file.
2954  */
2955 #ifndef _SYS_SYSPROTO_H_
2956 struct futimes_args {
2957 	int	fd;
2958 	struct	timeval *tptr;
2959 };
2960 #endif
2961 int
2962 futimes(td, uap)
2963 	struct thread *td;
2964 	register struct futimes_args /* {
2965 		int  fd;
2966 		struct timeval *tptr;
2967 	} */ *uap;
2968 {
2969 
2970 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2971 }
2972 
2973 int
2974 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2975     enum uio_seg tptrseg)
2976 {
2977 	struct timespec ts[2];
2978 	struct file *fp;
2979 	int vfslocked;
2980 	int error;
2981 
2982 	AUDIT_ARG(fd, fd);
2983 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2984 		return (error);
2985 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2986 		return (error);
2987 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2988 #ifdef AUDIT
2989 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2990 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2991 	VOP_UNLOCK(fp->f_vnode, 0, td);
2992 #endif
2993 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2994 	VFS_UNLOCK_GIANT(vfslocked);
2995 	fdrop(fp, td);
2996 	return (error);
2997 }
2998 
2999 /*
3000  * Truncate a file given its path name.
3001  */
3002 #ifndef _SYS_SYSPROTO_H_
3003 struct truncate_args {
3004 	char	*path;
3005 	int	pad;
3006 	off_t	length;
3007 };
3008 #endif
3009 int
3010 truncate(td, uap)
3011 	struct thread *td;
3012 	register struct truncate_args /* {
3013 		char *path;
3014 		int pad;
3015 		off_t length;
3016 	} */ *uap;
3017 {
3018 
3019 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3020 }
3021 
3022 int
3023 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3024 {
3025 	struct mount *mp;
3026 	struct vnode *vp;
3027 	struct vattr vattr;
3028 	int error;
3029 	struct nameidata nd;
3030 	int vfslocked;
3031 
3032 	if (length < 0)
3033 		return(EINVAL);
3034 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3035 	if ((error = namei(&nd)) != 0)
3036 		return (error);
3037 	vfslocked = NDHASGIANT(&nd);
3038 	vp = nd.ni_vp;
3039 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3040 		vrele(vp);
3041 		VFS_UNLOCK_GIANT(vfslocked);
3042 		return (error);
3043 	}
3044 	NDFREE(&nd, NDF_ONLY_PNBUF);
3045 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3046 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3047 	if (vp->v_type == VDIR)
3048 		error = EISDIR;
3049 #ifdef MAC
3050 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3051 	}
3052 #endif
3053 	else if ((error = vn_writechk(vp)) == 0 &&
3054 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3055 		VATTR_NULL(&vattr);
3056 		vattr.va_size = length;
3057 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3058 	}
3059 	vput(vp);
3060 	vn_finished_write(mp);
3061 	VFS_UNLOCK_GIANT(vfslocked);
3062 	return (error);
3063 }
3064 
3065 /*
3066  * Truncate a file given a file descriptor.
3067  */
3068 #ifndef _SYS_SYSPROTO_H_
3069 struct ftruncate_args {
3070 	int	fd;
3071 	int	pad;
3072 	off_t	length;
3073 };
3074 #endif
3075 int
3076 ftruncate(td, uap)
3077 	struct thread *td;
3078 	register struct ftruncate_args /* {
3079 		int fd;
3080 		int pad;
3081 		off_t length;
3082 	} */ *uap;
3083 {
3084 	struct mount *mp;
3085 	struct vattr vattr;
3086 	struct vnode *vp;
3087 	struct file *fp;
3088 	int vfslocked;
3089 	int error;
3090 
3091 	AUDIT_ARG(fd, uap->fd);
3092 	if (uap->length < 0)
3093 		return(EINVAL);
3094 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3095 		return (error);
3096 	if ((fp->f_flag & FWRITE) == 0) {
3097 		fdrop(fp, td);
3098 		return (EINVAL);
3099 	}
3100 	vp = fp->f_vnode;
3101 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3102 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3103 		goto drop;
3104 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3105 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3106 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3107 	if (vp->v_type == VDIR)
3108 		error = EISDIR;
3109 #ifdef MAC
3110 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3111 	    vp))) {
3112 	}
3113 #endif
3114 	else if ((error = vn_writechk(vp)) == 0) {
3115 		VATTR_NULL(&vattr);
3116 		vattr.va_size = uap->length;
3117 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3118 	}
3119 	VOP_UNLOCK(vp, 0, td);
3120 	vn_finished_write(mp);
3121 drop:
3122 	VFS_UNLOCK_GIANT(vfslocked);
3123 	fdrop(fp, td);
3124 	return (error);
3125 }
3126 
3127 #if defined(COMPAT_43)
3128 /*
3129  * Truncate a file given its path name.
3130  */
3131 #ifndef _SYS_SYSPROTO_H_
3132 struct otruncate_args {
3133 	char	*path;
3134 	long	length;
3135 };
3136 #endif
3137 int
3138 otruncate(td, uap)
3139 	struct thread *td;
3140 	register struct otruncate_args /* {
3141 		char *path;
3142 		long length;
3143 	} */ *uap;
3144 {
3145 	struct truncate_args /* {
3146 		char *path;
3147 		int pad;
3148 		off_t length;
3149 	} */ nuap;
3150 
3151 	nuap.path = uap->path;
3152 	nuap.length = uap->length;
3153 	return (truncate(td, &nuap));
3154 }
3155 
3156 /*
3157  * Truncate a file given a file descriptor.
3158  */
3159 #ifndef _SYS_SYSPROTO_H_
3160 struct oftruncate_args {
3161 	int	fd;
3162 	long	length;
3163 };
3164 #endif
3165 int
3166 oftruncate(td, uap)
3167 	struct thread *td;
3168 	register struct oftruncate_args /* {
3169 		int fd;
3170 		long length;
3171 	} */ *uap;
3172 {
3173 	struct ftruncate_args /* {
3174 		int fd;
3175 		int pad;
3176 		off_t length;
3177 	} */ nuap;
3178 
3179 	nuap.fd = uap->fd;
3180 	nuap.length = uap->length;
3181 	return (ftruncate(td, &nuap));
3182 }
3183 #endif /* COMPAT_43 */
3184 
3185 /*
3186  * Sync an open file.
3187  */
3188 #ifndef _SYS_SYSPROTO_H_
3189 struct fsync_args {
3190 	int	fd;
3191 };
3192 #endif
3193 int
3194 fsync(td, uap)
3195 	struct thread *td;
3196 	struct fsync_args /* {
3197 		int fd;
3198 	} */ *uap;
3199 {
3200 	struct vnode *vp;
3201 	struct mount *mp;
3202 	struct file *fp;
3203 	int vfslocked;
3204 	int error;
3205 
3206 	AUDIT_ARG(fd, uap->fd);
3207 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3208 		return (error);
3209 	vp = fp->f_vnode;
3210 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3211 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3212 		goto drop;
3213 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3214 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3215 	if (vp->v_object != NULL) {
3216 		VM_OBJECT_LOCK(vp->v_object);
3217 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3218 		VM_OBJECT_UNLOCK(vp->v_object);
3219 	}
3220 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3221 
3222 	VOP_UNLOCK(vp, 0, td);
3223 	vn_finished_write(mp);
3224 drop:
3225 	VFS_UNLOCK_GIANT(vfslocked);
3226 	fdrop(fp, td);
3227 	return (error);
3228 }
3229 
3230 /*
3231  * Rename files.  Source and destination must either both be directories,
3232  * or both not be directories.  If target is a directory, it must be empty.
3233  */
3234 #ifndef _SYS_SYSPROTO_H_
3235 struct rename_args {
3236 	char	*from;
3237 	char	*to;
3238 };
3239 #endif
3240 int
3241 rename(td, uap)
3242 	struct thread *td;
3243 	register struct rename_args /* {
3244 		char *from;
3245 		char *to;
3246 	} */ *uap;
3247 {
3248 
3249 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3250 }
3251 
3252 int
3253 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3254 {
3255 	struct mount *mp = NULL;
3256 	struct vnode *tvp, *fvp, *tdvp;
3257 	struct nameidata fromnd, tond;
3258 	int tvfslocked;
3259 	int fvfslocked;
3260 	int error;
3261 
3262 	bwillwrite();
3263 #ifdef MAC
3264 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3265 	    AUDITVNODE1, pathseg, from, td);
3266 #else
3267 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3268 	    AUDITVNODE1, pathseg, from, td);
3269 #endif
3270 	if ((error = namei(&fromnd)) != 0)
3271 		return (error);
3272 	fvfslocked = NDHASGIANT(&fromnd);
3273 	tvfslocked = 0;
3274 #ifdef MAC
3275 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3276 	    fromnd.ni_vp, &fromnd.ni_cnd);
3277 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3278 	if (fromnd.ni_dvp != fromnd.ni_vp)
3279 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3280 #endif
3281 	fvp = fromnd.ni_vp;
3282 	if (error == 0)
3283 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3284 	if (error != 0) {
3285 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3286 		vrele(fromnd.ni_dvp);
3287 		vrele(fvp);
3288 		goto out1;
3289 	}
3290 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3291 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3292 	if (fromnd.ni_vp->v_type == VDIR)
3293 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3294 	if ((error = namei(&tond)) != 0) {
3295 		/* Translate error code for rename("dir1", "dir2/."). */
3296 		if (error == EISDIR && fvp->v_type == VDIR)
3297 			error = EINVAL;
3298 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3299 		vrele(fromnd.ni_dvp);
3300 		vrele(fvp);
3301 		vn_finished_write(mp);
3302 		goto out1;
3303 	}
3304 	tvfslocked = NDHASGIANT(&tond);
3305 	tdvp = tond.ni_dvp;
3306 	tvp = tond.ni_vp;
3307 	if (tvp != NULL) {
3308 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3309 			error = ENOTDIR;
3310 			goto out;
3311 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3312 			error = EISDIR;
3313 			goto out;
3314 		}
3315 	}
3316 	if (fvp == tdvp)
3317 		error = EINVAL;
3318 	/*
3319 	 * If the source is the same as the destination (that is, if they
3320 	 * are links to the same vnode), then there is nothing to do.
3321 	 */
3322 	if (fvp == tvp)
3323 		error = -1;
3324 #ifdef MAC
3325 	else
3326 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3327 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3328 #endif
3329 out:
3330 	if (!error) {
3331 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3332 		if (fromnd.ni_dvp != tdvp) {
3333 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3334 		}
3335 		if (tvp) {
3336 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3337 		}
3338 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3339 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3340 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3341 		NDFREE(&tond, NDF_ONLY_PNBUF);
3342 	} else {
3343 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3344 		NDFREE(&tond, NDF_ONLY_PNBUF);
3345 		if (tvp)
3346 			vput(tvp);
3347 		if (tdvp == tvp)
3348 			vrele(tdvp);
3349 		else
3350 			vput(tdvp);
3351 		vrele(fromnd.ni_dvp);
3352 		vrele(fvp);
3353 	}
3354 	vrele(tond.ni_startdir);
3355 	vn_finished_write(mp);
3356 out1:
3357 	if (fromnd.ni_startdir)
3358 		vrele(fromnd.ni_startdir);
3359 	VFS_UNLOCK_GIANT(fvfslocked);
3360 	VFS_UNLOCK_GIANT(tvfslocked);
3361 	if (error == -1)
3362 		return (0);
3363 	return (error);
3364 }
3365 
3366 /*
3367  * Make a directory file.
3368  */
3369 #ifndef _SYS_SYSPROTO_H_
3370 struct mkdir_args {
3371 	char	*path;
3372 	int	mode;
3373 };
3374 #endif
3375 int
3376 mkdir(td, uap)
3377 	struct thread *td;
3378 	register struct mkdir_args /* {
3379 		char *path;
3380 		int mode;
3381 	} */ *uap;
3382 {
3383 
3384 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3385 }
3386 
3387 int
3388 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3389 {
3390 	struct mount *mp;
3391 	struct vnode *vp;
3392 	struct vattr vattr;
3393 	int error;
3394 	struct nameidata nd;
3395 	int vfslocked;
3396 
3397 	AUDIT_ARG(mode, mode);
3398 restart:
3399 	bwillwrite();
3400 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3401 	    segflg, path, td);
3402 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3403 	if ((error = namei(&nd)) != 0)
3404 		return (error);
3405 	vfslocked = NDHASGIANT(&nd);
3406 	vp = nd.ni_vp;
3407 	if (vp != NULL) {
3408 		NDFREE(&nd, NDF_ONLY_PNBUF);
3409 		/*
3410 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3411 		 * the strange behaviour of leaving the vnode unlocked
3412 		 * if the target is the same vnode as the parent.
3413 		 */
3414 		if (vp == nd.ni_dvp)
3415 			vrele(nd.ni_dvp);
3416 		else
3417 			vput(nd.ni_dvp);
3418 		vrele(vp);
3419 		VFS_UNLOCK_GIANT(vfslocked);
3420 		return (EEXIST);
3421 	}
3422 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3423 		NDFREE(&nd, NDF_ONLY_PNBUF);
3424 		vput(nd.ni_dvp);
3425 		VFS_UNLOCK_GIANT(vfslocked);
3426 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3427 			return (error);
3428 		goto restart;
3429 	}
3430 	VATTR_NULL(&vattr);
3431 	vattr.va_type = VDIR;
3432 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3433 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3434 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3435 #ifdef MAC
3436 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3437 	    &vattr);
3438 	if (error)
3439 		goto out;
3440 #endif
3441 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3442 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3443 #ifdef MAC
3444 out:
3445 #endif
3446 	NDFREE(&nd, NDF_ONLY_PNBUF);
3447 	vput(nd.ni_dvp);
3448 	if (!error)
3449 		vput(nd.ni_vp);
3450 	vn_finished_write(mp);
3451 	VFS_UNLOCK_GIANT(vfslocked);
3452 	return (error);
3453 }
3454 
3455 /*
3456  * Remove a directory file.
3457  */
3458 #ifndef _SYS_SYSPROTO_H_
3459 struct rmdir_args {
3460 	char	*path;
3461 };
3462 #endif
3463 int
3464 rmdir(td, uap)
3465 	struct thread *td;
3466 	struct rmdir_args /* {
3467 		char *path;
3468 	} */ *uap;
3469 {
3470 
3471 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3472 }
3473 
3474 int
3475 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3476 {
3477 	struct mount *mp;
3478 	struct vnode *vp;
3479 	int error;
3480 	struct nameidata nd;
3481 	int vfslocked;
3482 
3483 restart:
3484 	bwillwrite();
3485 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3486 	    pathseg, path, td);
3487 	if ((error = namei(&nd)) != 0)
3488 		return (error);
3489 	vfslocked = NDHASGIANT(&nd);
3490 	vp = nd.ni_vp;
3491 	if (vp->v_type != VDIR) {
3492 		error = ENOTDIR;
3493 		goto out;
3494 	}
3495 	/*
3496 	 * No rmdir "." please.
3497 	 */
3498 	if (nd.ni_dvp == vp) {
3499 		error = EINVAL;
3500 		goto out;
3501 	}
3502 	/*
3503 	 * The root of a mounted filesystem cannot be deleted.
3504 	 */
3505 	if (vp->v_vflag & VV_ROOT) {
3506 		error = EBUSY;
3507 		goto out;
3508 	}
3509 #ifdef MAC
3510 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3511 	    &nd.ni_cnd);
3512 	if (error)
3513 		goto out;
3514 #endif
3515 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3516 		NDFREE(&nd, NDF_ONLY_PNBUF);
3517 		vput(vp);
3518 		if (nd.ni_dvp == vp)
3519 			vrele(nd.ni_dvp);
3520 		else
3521 			vput(nd.ni_dvp);
3522 		VFS_UNLOCK_GIANT(vfslocked);
3523 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3524 			return (error);
3525 		goto restart;
3526 	}
3527 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3528 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3529 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3530 	vn_finished_write(mp);
3531 out:
3532 	NDFREE(&nd, NDF_ONLY_PNBUF);
3533 	vput(vp);
3534 	if (nd.ni_dvp == vp)
3535 		vrele(nd.ni_dvp);
3536 	else
3537 		vput(nd.ni_dvp);
3538 	VFS_UNLOCK_GIANT(vfslocked);
3539 	return (error);
3540 }
3541 
3542 #ifdef COMPAT_43
3543 /*
3544  * Read a block of directory entries in a filesystem independent format.
3545  */
3546 #ifndef _SYS_SYSPROTO_H_
3547 struct ogetdirentries_args {
3548 	int	fd;
3549 	char	*buf;
3550 	u_int	count;
3551 	long	*basep;
3552 };
3553 #endif
3554 int
3555 ogetdirentries(td, uap)
3556 	struct thread *td;
3557 	register struct ogetdirentries_args /* {
3558 		int fd;
3559 		char *buf;
3560 		u_int count;
3561 		long *basep;
3562 	} */ *uap;
3563 {
3564 	struct vnode *vp;
3565 	struct file *fp;
3566 	struct uio auio, kuio;
3567 	struct iovec aiov, kiov;
3568 	struct dirent *dp, *edp;
3569 	caddr_t dirbuf;
3570 	int error, eofflag, readcnt, vfslocked;
3571 	long loff;
3572 
3573 	/* XXX arbitrary sanity limit on `count'. */
3574 	if (uap->count > 64 * 1024)
3575 		return (EINVAL);
3576 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3577 		return (error);
3578 	if ((fp->f_flag & FREAD) == 0) {
3579 		fdrop(fp, td);
3580 		return (EBADF);
3581 	}
3582 	vp = fp->f_vnode;
3583 unionread:
3584 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3585 	if (vp->v_type != VDIR) {
3586 		VFS_UNLOCK_GIANT(vfslocked);
3587 		fdrop(fp, td);
3588 		return (EINVAL);
3589 	}
3590 	aiov.iov_base = uap->buf;
3591 	aiov.iov_len = uap->count;
3592 	auio.uio_iov = &aiov;
3593 	auio.uio_iovcnt = 1;
3594 	auio.uio_rw = UIO_READ;
3595 	auio.uio_segflg = UIO_USERSPACE;
3596 	auio.uio_td = td;
3597 	auio.uio_resid = uap->count;
3598 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3599 	loff = auio.uio_offset = fp->f_offset;
3600 #ifdef MAC
3601 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3602 	if (error) {
3603 		VOP_UNLOCK(vp, 0, td);
3604 		VFS_UNLOCK_GIANT(vfslocked);
3605 		fdrop(fp, td);
3606 		return (error);
3607 	}
3608 #endif
3609 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3610 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3611 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3612 			    NULL, NULL);
3613 			fp->f_offset = auio.uio_offset;
3614 		} else
3615 #	endif
3616 	{
3617 		kuio = auio;
3618 		kuio.uio_iov = &kiov;
3619 		kuio.uio_segflg = UIO_SYSSPACE;
3620 		kiov.iov_len = uap->count;
3621 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3622 		kiov.iov_base = dirbuf;
3623 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3624 			    NULL, NULL);
3625 		fp->f_offset = kuio.uio_offset;
3626 		if (error == 0) {
3627 			readcnt = uap->count - kuio.uio_resid;
3628 			edp = (struct dirent *)&dirbuf[readcnt];
3629 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3630 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3631 					/*
3632 					 * The expected low byte of
3633 					 * dp->d_namlen is our dp->d_type.
3634 					 * The high MBZ byte of dp->d_namlen
3635 					 * is our dp->d_namlen.
3636 					 */
3637 					dp->d_type = dp->d_namlen;
3638 					dp->d_namlen = 0;
3639 #				else
3640 					/*
3641 					 * The dp->d_type is the high byte
3642 					 * of the expected dp->d_namlen,
3643 					 * so must be zero'ed.
3644 					 */
3645 					dp->d_type = 0;
3646 #				endif
3647 				if (dp->d_reclen > 0) {
3648 					dp = (struct dirent *)
3649 					    ((char *)dp + dp->d_reclen);
3650 				} else {
3651 					error = EIO;
3652 					break;
3653 				}
3654 			}
3655 			if (dp >= edp)
3656 				error = uiomove(dirbuf, readcnt, &auio);
3657 		}
3658 		FREE(dirbuf, M_TEMP);
3659 	}
3660 	VOP_UNLOCK(vp, 0, td);
3661 	if (error) {
3662 		VFS_UNLOCK_GIANT(vfslocked);
3663 		fdrop(fp, td);
3664 		return (error);
3665 	}
3666 	if (uap->count == auio.uio_resid) {
3667 		if (union_dircheckp) {
3668 			error = union_dircheckp(td, &vp, fp);
3669 			if (error == -1) {
3670 				VFS_UNLOCK_GIANT(vfslocked);
3671 				goto unionread;
3672 			}
3673 			if (error) {
3674 				VFS_UNLOCK_GIANT(vfslocked);
3675 				fdrop(fp, td);
3676 				return (error);
3677 			}
3678 		}
3679 		/*
3680 		 * XXX We could delay dropping the lock above but
3681 		 * union_dircheckp complicates things.
3682 		 */
3683 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3684 		if ((vp->v_vflag & VV_ROOT) &&
3685 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3686 			struct vnode *tvp = vp;
3687 			vp = vp->v_mount->mnt_vnodecovered;
3688 			VREF(vp);
3689 			fp->f_vnode = vp;
3690 			fp->f_data = vp;
3691 			fp->f_offset = 0;
3692 			vput(tvp);
3693 			VFS_UNLOCK_GIANT(vfslocked);
3694 			goto unionread;
3695 		}
3696 		VOP_UNLOCK(vp, 0, td);
3697 	}
3698 	VFS_UNLOCK_GIANT(vfslocked);
3699 	error = copyout(&loff, uap->basep, sizeof(long));
3700 	fdrop(fp, td);
3701 	td->td_retval[0] = uap->count - auio.uio_resid;
3702 	return (error);
3703 }
3704 #endif /* COMPAT_43 */
3705 
3706 /*
3707  * Read a block of directory entries in a filesystem independent format.
3708  */
3709 #ifndef _SYS_SYSPROTO_H_
3710 struct getdirentries_args {
3711 	int	fd;
3712 	char	*buf;
3713 	u_int	count;
3714 	long	*basep;
3715 };
3716 #endif
3717 int
3718 getdirentries(td, uap)
3719 	struct thread *td;
3720 	register struct getdirentries_args /* {
3721 		int fd;
3722 		char *buf;
3723 		u_int count;
3724 		long *basep;
3725 	} */ *uap;
3726 {
3727 	struct vnode *vp;
3728 	struct file *fp;
3729 	struct uio auio;
3730 	struct iovec aiov;
3731 	int vfslocked;
3732 	long loff;
3733 	int error, eofflag;
3734 
3735 	AUDIT_ARG(fd, uap->fd);
3736 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3737 		return (error);
3738 	if ((fp->f_flag & FREAD) == 0) {
3739 		fdrop(fp, td);
3740 		return (EBADF);
3741 	}
3742 	vp = fp->f_vnode;
3743 unionread:
3744 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3745 	if (vp->v_type != VDIR) {
3746 		error = EINVAL;
3747 		goto fail;
3748 	}
3749 	aiov.iov_base = uap->buf;
3750 	aiov.iov_len = uap->count;
3751 	auio.uio_iov = &aiov;
3752 	auio.uio_iovcnt = 1;
3753 	auio.uio_rw = UIO_READ;
3754 	auio.uio_segflg = UIO_USERSPACE;
3755 	auio.uio_td = td;
3756 	auio.uio_resid = uap->count;
3757 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3758 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3759 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3760 	loff = auio.uio_offset = fp->f_offset;
3761 #ifdef MAC
3762 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3763 	if (error == 0)
3764 #endif
3765 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3766 		    NULL);
3767 	fp->f_offset = auio.uio_offset;
3768 	VOP_UNLOCK(vp, 0, td);
3769 	if (error)
3770 		goto fail;
3771 	if (uap->count == auio.uio_resid) {
3772 		if (union_dircheckp) {
3773 			error = union_dircheckp(td, &vp, fp);
3774 			if (error == -1) {
3775 				VFS_UNLOCK_GIANT(vfslocked);
3776 				goto unionread;
3777 			}
3778 			if (error)
3779 				goto fail;
3780 		}
3781 		/*
3782 		 * XXX We could delay dropping the lock above but
3783 		 * union_dircheckp complicates things.
3784 		 */
3785 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3786 		if ((vp->v_vflag & VV_ROOT) &&
3787 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3788 			struct vnode *tvp = vp;
3789 			vp = vp->v_mount->mnt_vnodecovered;
3790 			VREF(vp);
3791 			fp->f_vnode = vp;
3792 			fp->f_data = vp;
3793 			fp->f_offset = 0;
3794 			vput(tvp);
3795 			VFS_UNLOCK_GIANT(vfslocked);
3796 			goto unionread;
3797 		}
3798 		VOP_UNLOCK(vp, 0, td);
3799 	}
3800 	if (uap->basep != NULL) {
3801 		error = copyout(&loff, uap->basep, sizeof(long));
3802 	}
3803 	td->td_retval[0] = uap->count - auio.uio_resid;
3804 fail:
3805 	VFS_UNLOCK_GIANT(vfslocked);
3806 	fdrop(fp, td);
3807 	return (error);
3808 }
3809 #ifndef _SYS_SYSPROTO_H_
3810 struct getdents_args {
3811 	int fd;
3812 	char *buf;
3813 	size_t count;
3814 };
3815 #endif
3816 int
3817 getdents(td, uap)
3818 	struct thread *td;
3819 	register struct getdents_args /* {
3820 		int fd;
3821 		char *buf;
3822 		u_int count;
3823 	} */ *uap;
3824 {
3825 	struct getdirentries_args ap;
3826 	ap.fd = uap->fd;
3827 	ap.buf = uap->buf;
3828 	ap.count = uap->count;
3829 	ap.basep = NULL;
3830 	return (getdirentries(td, &ap));
3831 }
3832 
3833 /*
3834  * Set the mode mask for creation of filesystem nodes.
3835  *
3836  * MP SAFE
3837  */
3838 #ifndef _SYS_SYSPROTO_H_
3839 struct umask_args {
3840 	int	newmask;
3841 };
3842 #endif
3843 int
3844 umask(td, uap)
3845 	struct thread *td;
3846 	struct umask_args /* {
3847 		int newmask;
3848 	} */ *uap;
3849 {
3850 	register struct filedesc *fdp;
3851 
3852 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3853 	fdp = td->td_proc->p_fd;
3854 	td->td_retval[0] = fdp->fd_cmask;
3855 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3856 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3857 	return (0);
3858 }
3859 
3860 /*
3861  * Void all references to file by ripping underlying filesystem
3862  * away from vnode.
3863  */
3864 #ifndef _SYS_SYSPROTO_H_
3865 struct revoke_args {
3866 	char	*path;
3867 };
3868 #endif
3869 int
3870 revoke(td, uap)
3871 	struct thread *td;
3872 	register struct revoke_args /* {
3873 		char *path;
3874 	} */ *uap;
3875 {
3876 	struct vnode *vp;
3877 	struct vattr vattr;
3878 	int error;
3879 	struct nameidata nd;
3880 	int vfslocked;
3881 
3882 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3883 	    UIO_USERSPACE, uap->path, td);
3884 	if ((error = namei(&nd)) != 0)
3885 		return (error);
3886 	vfslocked = NDHASGIANT(&nd);
3887 	vp = nd.ni_vp;
3888 	NDFREE(&nd, NDF_ONLY_PNBUF);
3889 	if (vp->v_type != VCHR) {
3890 		error = EINVAL;
3891 		goto out;
3892 	}
3893 #ifdef MAC
3894 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3895 	if (error)
3896 		goto out;
3897 #endif
3898 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3899 	if (error)
3900 		goto out;
3901 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3902 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3903 		    SUSER_ALLOWJAIL);
3904 		if (error)
3905 			goto out;
3906 	}
3907 	if (vcount(vp) > 1)
3908 		VOP_REVOKE(vp, REVOKEALL);
3909 out:
3910 	vput(vp);
3911 	VFS_UNLOCK_GIANT(vfslocked);
3912 	return (error);
3913 }
3914 
3915 /*
3916  * Convert a user file descriptor to a kernel file entry.
3917  * A reference on the file entry is held upon returning.
3918  */
3919 int
3920 getvnode(fdp, fd, fpp)
3921 	struct filedesc *fdp;
3922 	int fd;
3923 	struct file **fpp;
3924 {
3925 	int error;
3926 	struct file *fp;
3927 
3928 	fp = NULL;
3929 	if (fdp == NULL)
3930 		error = EBADF;
3931 	else {
3932 		FILEDESC_LOCK(fdp);
3933 		if ((u_int)fd >= fdp->fd_nfiles ||
3934 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3935 			error = EBADF;
3936 		else if (fp->f_vnode == NULL) {
3937 			fp = NULL;
3938 			error = EINVAL;
3939 		} else {
3940 			fhold(fp);
3941 			error = 0;
3942 		}
3943 		FILEDESC_UNLOCK(fdp);
3944 	}
3945 	*fpp = fp;
3946 	return (error);
3947 }
3948 
3949 /*
3950  * Get (NFS) file handle
3951  */
3952 #ifndef _SYS_SYSPROTO_H_
3953 struct lgetfh_args {
3954 	char	*fname;
3955 	fhandle_t *fhp;
3956 };
3957 #endif
3958 int
3959 lgetfh(td, uap)
3960 	struct thread *td;
3961 	register struct lgetfh_args *uap;
3962 {
3963 	struct nameidata nd;
3964 	fhandle_t fh;
3965 	register struct vnode *vp;
3966 	int vfslocked;
3967 	int error;
3968 
3969 	error = priv_check(td, PRIV_VFS_GETFH);
3970 	if (error)
3971 		return (error);
3972 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3973 	    UIO_USERSPACE, uap->fname, td);
3974 	error = namei(&nd);
3975 	if (error)
3976 		return (error);
3977 	vfslocked = NDHASGIANT(&nd);
3978 	NDFREE(&nd, NDF_ONLY_PNBUF);
3979 	vp = nd.ni_vp;
3980 	bzero(&fh, sizeof(fh));
3981 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3982 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3983 	vput(vp);
3984 	VFS_UNLOCK_GIANT(vfslocked);
3985 	if (error)
3986 		return (error);
3987 	error = copyout(&fh, uap->fhp, sizeof (fh));
3988 	return (error);
3989 }
3990 
3991 #ifndef _SYS_SYSPROTO_H_
3992 struct getfh_args {
3993 	char	*fname;
3994 	fhandle_t *fhp;
3995 };
3996 #endif
3997 int
3998 getfh(td, uap)
3999 	struct thread *td;
4000 	register struct getfh_args *uap;
4001 {
4002 	struct nameidata nd;
4003 	fhandle_t fh;
4004 	register struct vnode *vp;
4005 	int vfslocked;
4006 	int error;
4007 
4008 	error = priv_check(td, PRIV_VFS_GETFH);
4009 	if (error)
4010 		return (error);
4011 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4012 	    UIO_USERSPACE, uap->fname, td);
4013 	error = namei(&nd);
4014 	if (error)
4015 		return (error);
4016 	vfslocked = NDHASGIANT(&nd);
4017 	NDFREE(&nd, NDF_ONLY_PNBUF);
4018 	vp = nd.ni_vp;
4019 	bzero(&fh, sizeof(fh));
4020 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4021 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4022 	vput(vp);
4023 	VFS_UNLOCK_GIANT(vfslocked);
4024 	if (error)
4025 		return (error);
4026 	error = copyout(&fh, uap->fhp, sizeof (fh));
4027 	return (error);
4028 }
4029 
4030 /*
4031  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4032  * open descriptor.
4033  *
4034  * warning: do not remove the priv_check() call or this becomes one giant
4035  * security hole.
4036  *
4037  * MP SAFE
4038  */
4039 #ifndef _SYS_SYSPROTO_H_
4040 struct fhopen_args {
4041 	const struct fhandle *u_fhp;
4042 	int flags;
4043 };
4044 #endif
4045 int
4046 fhopen(td, uap)
4047 	struct thread *td;
4048 	struct fhopen_args /* {
4049 		const struct fhandle *u_fhp;
4050 		int flags;
4051 	} */ *uap;
4052 {
4053 	struct proc *p = td->td_proc;
4054 	struct mount *mp;
4055 	struct vnode *vp;
4056 	struct fhandle fhp;
4057 	struct vattr vat;
4058 	struct vattr *vap = &vat;
4059 	struct flock lf;
4060 	struct file *fp;
4061 	register struct filedesc *fdp = p->p_fd;
4062 	int fmode, mode, error, type;
4063 	struct file *nfp;
4064 	int vfslocked;
4065 	int indx;
4066 
4067 	error = priv_check(td, PRIV_VFS_FHOPEN);
4068 	if (error)
4069 		return (error);
4070 	fmode = FFLAGS(uap->flags);
4071 	/* why not allow a non-read/write open for our lockd? */
4072 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4073 		return (EINVAL);
4074 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4075 	if (error)
4076 		return(error);
4077 	/* find the mount point */
4078 	mp = vfs_getvfs(&fhp.fh_fsid);
4079 	if (mp == NULL)
4080 		return (ESTALE);
4081 	vfslocked = VFS_LOCK_GIANT(mp);
4082 	/* now give me my vnode, it gets returned to me locked */
4083 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4084 	if (error)
4085 		goto out;
4086 	/*
4087 	 * from now on we have to make sure not
4088 	 * to forget about the vnode
4089 	 * any error that causes an abort must vput(vp)
4090 	 * just set error = err and 'goto bad;'.
4091 	 */
4092 
4093 	/*
4094 	 * from vn_open
4095 	 */
4096 	if (vp->v_type == VLNK) {
4097 		error = EMLINK;
4098 		goto bad;
4099 	}
4100 	if (vp->v_type == VSOCK) {
4101 		error = EOPNOTSUPP;
4102 		goto bad;
4103 	}
4104 	mode = 0;
4105 	if (fmode & (FWRITE | O_TRUNC)) {
4106 		if (vp->v_type == VDIR) {
4107 			error = EISDIR;
4108 			goto bad;
4109 		}
4110 		error = vn_writechk(vp);
4111 		if (error)
4112 			goto bad;
4113 		mode |= VWRITE;
4114 	}
4115 	if (fmode & FREAD)
4116 		mode |= VREAD;
4117 	if (fmode & O_APPEND)
4118 		mode |= VAPPEND;
4119 #ifdef MAC
4120 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4121 	if (error)
4122 		goto bad;
4123 #endif
4124 	if (mode) {
4125 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4126 		if (error)
4127 			goto bad;
4128 	}
4129 	if (fmode & O_TRUNC) {
4130 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4131 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4132 			vrele(vp);
4133 			goto out;
4134 		}
4135 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4136 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4137 #ifdef MAC
4138 		/*
4139 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4140 		 * should be right.
4141 		 */
4142 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4143 		if (error == 0) {
4144 #endif
4145 			VATTR_NULL(vap);
4146 			vap->va_size = 0;
4147 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4148 #ifdef MAC
4149 		}
4150 #endif
4151 		vn_finished_write(mp);
4152 		if (error)
4153 			goto bad;
4154 	}
4155 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4156 	if (error)
4157 		goto bad;
4158 
4159 	if (fmode & FWRITE)
4160 		vp->v_writecount++;
4161 
4162 	/*
4163 	 * end of vn_open code
4164 	 */
4165 
4166 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4167 		if (fmode & FWRITE)
4168 			vp->v_writecount--;
4169 		goto bad;
4170 	}
4171 	/* An extra reference on `nfp' has been held for us by falloc(). */
4172 	fp = nfp;
4173 
4174 	nfp->f_vnode = vp;
4175 	nfp->f_data = vp;
4176 	nfp->f_flag = fmode & FMASK;
4177 	nfp->f_ops = &vnops;
4178 	nfp->f_type = DTYPE_VNODE;
4179 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4180 		lf.l_whence = SEEK_SET;
4181 		lf.l_start = 0;
4182 		lf.l_len = 0;
4183 		if (fmode & O_EXLOCK)
4184 			lf.l_type = F_WRLCK;
4185 		else
4186 			lf.l_type = F_RDLCK;
4187 		type = F_FLOCK;
4188 		if ((fmode & FNONBLOCK) == 0)
4189 			type |= F_WAIT;
4190 		VOP_UNLOCK(vp, 0, td);
4191 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4192 			    type)) != 0) {
4193 			/*
4194 			 * The lock request failed.  Normally close the
4195 			 * descriptor but handle the case where someone might
4196 			 * have dup()d or close()d it when we weren't looking.
4197 			 */
4198 			fdclose(fdp, fp, indx, td);
4199 
4200 			/*
4201 			 * release our private reference
4202 			 */
4203 			fdrop(fp, td);
4204 			goto out;
4205 		}
4206 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4207 		fp->f_flag |= FHASLOCK;
4208 	}
4209 
4210 	VOP_UNLOCK(vp, 0, td);
4211 	fdrop(fp, td);
4212 	vfs_rel(mp);
4213 	VFS_UNLOCK_GIANT(vfslocked);
4214 	td->td_retval[0] = indx;
4215 	return (0);
4216 
4217 bad:
4218 	vput(vp);
4219 out:
4220 	vfs_rel(mp);
4221 	VFS_UNLOCK_GIANT(vfslocked);
4222 	return (error);
4223 }
4224 
4225 /*
4226  * Stat an (NFS) file handle.
4227  *
4228  * MP SAFE
4229  */
4230 #ifndef _SYS_SYSPROTO_H_
4231 struct fhstat_args {
4232 	struct fhandle *u_fhp;
4233 	struct stat *sb;
4234 };
4235 #endif
4236 int
4237 fhstat(td, uap)
4238 	struct thread *td;
4239 	register struct fhstat_args /* {
4240 		struct fhandle *u_fhp;
4241 		struct stat *sb;
4242 	} */ *uap;
4243 {
4244 	struct stat sb;
4245 	fhandle_t fh;
4246 	struct mount *mp;
4247 	struct vnode *vp;
4248 	int vfslocked;
4249 	int error;
4250 
4251 	error = priv_check(td, PRIV_VFS_FHSTAT);
4252 	if (error)
4253 		return (error);
4254 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4255 	if (error)
4256 		return (error);
4257 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4258 		return (ESTALE);
4259 	vfslocked = VFS_LOCK_GIANT(mp);
4260 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4261 		vfs_rel(mp);
4262 		VFS_UNLOCK_GIANT(vfslocked);
4263 		return (error);
4264 	}
4265 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4266 	vput(vp);
4267 	vfs_rel(mp);
4268 	VFS_UNLOCK_GIANT(vfslocked);
4269 	if (error)
4270 		return (error);
4271 	error = copyout(&sb, uap->sb, sizeof(sb));
4272 	return (error);
4273 }
4274 
4275 /*
4276  * Implement fstatfs() for (NFS) file handles.
4277  *
4278  * MP SAFE
4279  */
4280 #ifndef _SYS_SYSPROTO_H_
4281 struct fhstatfs_args {
4282 	struct fhandle *u_fhp;
4283 	struct statfs *buf;
4284 };
4285 #endif
4286 int
4287 fhstatfs(td, uap)
4288 	struct thread *td;
4289 	struct fhstatfs_args /* {
4290 		struct fhandle *u_fhp;
4291 		struct statfs *buf;
4292 	} */ *uap;
4293 {
4294 	struct statfs sf;
4295 	fhandle_t fh;
4296 	int error;
4297 
4298 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4299 	if (error)
4300 		return (error);
4301 	error = kern_fhstatfs(td, fh, &sf);
4302 	if (error)
4303 		return (error);
4304 	return (copyout(&sf, uap->buf, sizeof(sf)));
4305 }
4306 
4307 int
4308 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4309 {
4310 	struct statfs *sp;
4311 	struct mount *mp;
4312 	struct vnode *vp;
4313 	int vfslocked;
4314 	int error;
4315 
4316 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4317 	if (error)
4318 		return (error);
4319 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4320 		return (ESTALE);
4321 	vfslocked = VFS_LOCK_GIANT(mp);
4322 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4323 	if (error) {
4324 		VFS_UNLOCK_GIANT(vfslocked);
4325 		vfs_rel(mp);
4326 		return (error);
4327 	}
4328 	vput(vp);
4329 	error = prison_canseemount(td->td_ucred, mp);
4330 	if (error)
4331 		goto out;
4332 #ifdef MAC
4333 	error = mac_check_mount_stat(td->td_ucred, mp);
4334 	if (error)
4335 		goto out;
4336 #endif
4337 	/*
4338 	 * Set these in case the underlying filesystem fails to do so.
4339 	 */
4340 	sp = &mp->mnt_stat;
4341 	sp->f_version = STATFS_VERSION;
4342 	sp->f_namemax = NAME_MAX;
4343 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4344 	error = VFS_STATFS(mp, sp, td);
4345 	if (error == 0)
4346 		*buf = *sp;
4347 out:
4348 	vfs_rel(mp);
4349 	VFS_UNLOCK_GIANT(vfslocked);
4350 	return (error);
4351 }
4352