xref: /freebsd/sys/kern/vfs_syscalls.c (revision 282a3889ebf826db9839be296ff1dd903f6d6d6e)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/priv.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 /*
92  * The module initialization routine for POSIX asynchronous I/O will
93  * set this to the version of AIO that it implements.  (Zero means
94  * that it is not implemented.)  This value is used here by pathconf()
95  * and in kern_descrip.c by fpathconf().
96  */
97 int async_io_version;
98 
99 #ifdef DEBUG
100 static int syncprt = 0;
101 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
102 #endif
103 
104 /*
105  * Sync each mounted filesystem.
106  */
107 #ifndef _SYS_SYSPROTO_H_
108 struct sync_args {
109 	int     dummy;
110 };
111 #endif
112 /* ARGSUSED */
113 int
114 sync(td, uap)
115 	struct thread *td;
116 	struct sync_args *uap;
117 {
118 	struct mount *mp, *nmp;
119 	int vfslocked;
120 
121 	mtx_lock(&mountlist_mtx);
122 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124 			nmp = TAILQ_NEXT(mp, mnt_list);
125 			continue;
126 		}
127 		vfslocked = VFS_LOCK_GIANT(mp);
128 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130 			MNT_ILOCK(mp);
131 			mp->mnt_noasync++;
132 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133 			MNT_IUNLOCK(mp);
134 			vfs_msync(mp, MNT_NOWAIT);
135 			VFS_SYNC(mp, MNT_NOWAIT, td);
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync--;
138 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139 			    mp->mnt_noasync == 0)
140 				mp->mnt_kern_flag |= MNTK_ASYNC;
141 			MNT_IUNLOCK(mp);
142 			vn_finished_write(mp);
143 		}
144 		VFS_UNLOCK_GIANT(vfslocked);
145 		mtx_lock(&mountlist_mtx);
146 		nmp = TAILQ_NEXT(mp, mnt_list);
147 		vfs_unbusy(mp, td);
148 	}
149 	mtx_unlock(&mountlist_mtx);
150 	return (0);
151 }
152 
153 /* XXX PRISON: could be per prison flag */
154 static int prison_quotas;
155 #if 0
156 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157 #endif
158 
159 /*
160  * Change filesystem quotas.
161  */
162 #ifndef _SYS_SYSPROTO_H_
163 struct quotactl_args {
164 	char *path;
165 	int cmd;
166 	int uid;
167 	caddr_t arg;
168 };
169 #endif
170 int
171 quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		char *path;
175 		int cmd;
176 		int uid;
177 		caddr_t arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int vfslocked;
182 	int error;
183 	struct nameidata nd;
184 
185 	AUDIT_ARG(cmd, uap->cmd);
186 	AUDIT_ARG(uid, uap->uid);
187 	if (jailed(td->td_ucred) && !prison_quotas)
188 		return (EPERM);
189 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
190 	   UIO_USERSPACE, uap->path, td);
191 	if ((error = namei(&nd)) != 0)
192 		return (error);
193 	vfslocked = NDHASGIANT(&nd);
194 	NDFREE(&nd, NDF_ONLY_PNBUF);
195 	mp = nd.ni_vp->v_mount;
196 	if ((error = vfs_busy(mp, 0, NULL, td))) {
197 		vrele(nd.ni_vp);
198 		VFS_UNLOCK_GIANT(vfslocked);
199 		return (error);
200 	}
201 	vrele(nd.ni_vp);
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vfs_unbusy(mp, td);
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error)
257 		goto out;
258 #endif
259 	/*
260 	 * Set these in case the underlying filesystem fails to do so.
261 	 */
262 	sp = &mp->mnt_stat;
263 	sp->f_version = STATFS_VERSION;
264 	sp->f_namemax = NAME_MAX;
265 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
266 	error = VFS_STATFS(mp, sp, td);
267 	if (error)
268 		goto out;
269 	if (priv_check(td, PRIV_VFS_GENERATION)) {
270 		bcopy(sp, &sb, sizeof(sb));
271 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
272 		prison_enforce_statfs(td->td_ucred, mp, &sb);
273 		sp = &sb;
274 	}
275 	*buf = *sp;
276 out:
277 	vfs_rel(mp);
278 	VFS_UNLOCK_GIANT(vfslocked);
279 	if (mtx_owned(&Giant))
280 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
281 	return (error);
282 }
283 
284 /*
285  * Get filesystem statistics.
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct fstatfs_args {
289 	int fd;
290 	struct statfs *buf;
291 };
292 #endif
293 int
294 fstatfs(td, uap)
295 	struct thread *td;
296 	register struct fstatfs_args /* {
297 		int fd;
298 		struct statfs *buf;
299 	} */ *uap;
300 {
301 	struct statfs sf;
302 	int error;
303 
304 	error = kern_fstatfs(td, uap->fd, &sf);
305 	if (error == 0)
306 		error = copyout(&sf, uap->buf, sizeof(sf));
307 	return (error);
308 }
309 
310 int
311 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
312 {
313 	struct file *fp;
314 	struct mount *mp;
315 	struct statfs *sp, sb;
316 	int vfslocked;
317 	struct vnode *vp;
318 	int error;
319 
320 	AUDIT_ARG(fd, fd);
321 	error = getvnode(td->td_proc->p_fd, fd, &fp);
322 	if (error)
323 		return (error);
324 	vp = fp->f_vnode;
325 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
326 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
327 #ifdef AUDIT
328 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
329 #endif
330 	mp = vp->v_mount;
331 	if (mp)
332 		vfs_ref(mp);
333 	VOP_UNLOCK(vp, 0, td);
334 	fdrop(fp, td);
335 	if (vp->v_iflag & VI_DOOMED) {
336 		error = EBADF;
337 		goto out;
338 	}
339 #ifdef MAC
340 	error = mac_check_mount_stat(td->td_ucred, mp);
341 	if (error)
342 		goto out;
343 #endif
344 	/*
345 	 * Set these in case the underlying filesystem fails to do so.
346 	 */
347 	sp = &mp->mnt_stat;
348 	sp->f_version = STATFS_VERSION;
349 	sp->f_namemax = NAME_MAX;
350 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
351 	error = VFS_STATFS(mp, sp, td);
352 	if (error)
353 		goto out;
354 	if (priv_check(td, PRIV_VFS_GENERATION)) {
355 		bcopy(sp, &sb, sizeof(sb));
356 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
357 		prison_enforce_statfs(td->td_ucred, mp, &sb);
358 		sp = &sb;
359 	}
360 	*buf = *sp;
361 out:
362 	if (mp)
363 		vfs_rel(mp);
364 	VFS_UNLOCK_GIANT(vfslocked);
365 	return (error);
366 }
367 
368 /*
369  * Get statistics on all filesystems.
370  */
371 #ifndef _SYS_SYSPROTO_H_
372 struct getfsstat_args {
373 	struct statfs *buf;
374 	long bufsize;
375 	int flags;
376 };
377 #endif
378 int
379 getfsstat(td, uap)
380 	struct thread *td;
381 	register struct getfsstat_args /* {
382 		struct statfs *buf;
383 		long bufsize;
384 		int flags;
385 	} */ *uap;
386 {
387 
388 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
389 	    uap->flags));
390 }
391 
392 /*
393  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
394  * 	The caller is responsible for freeing memory which will be allocated
395  *	in '*buf'.
396  */
397 int
398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
399     enum uio_seg bufseg, int flags)
400 {
401 	struct mount *mp, *nmp;
402 	struct statfs *sfsp, *sp, sb;
403 	size_t count, maxcount;
404 	int vfslocked;
405 	int error;
406 
407 	maxcount = bufsize / sizeof(struct statfs);
408 	if (bufsize == 0)
409 		sfsp = NULL;
410 	else if (bufseg == UIO_USERSPACE)
411 		sfsp = *buf;
412 	else /* if (bufseg == UIO_SYSSPACE) */ {
413 		count = 0;
414 		mtx_lock(&mountlist_mtx);
415 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
416 			count++;
417 		}
418 		mtx_unlock(&mountlist_mtx);
419 		if (maxcount > count)
420 			maxcount = count;
421 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
422 		    M_WAITOK);
423 	}
424 	count = 0;
425 	mtx_lock(&mountlist_mtx);
426 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
427 		if (prison_canseemount(td->td_ucred, mp) != 0) {
428 			nmp = TAILQ_NEXT(mp, mnt_list);
429 			continue;
430 		}
431 #ifdef MAC
432 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #endif
437 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 		vfslocked = VFS_LOCK_GIANT(mp);
442 		if (sfsp && count < maxcount) {
443 			sp = &mp->mnt_stat;
444 			/*
445 			 * Set these in case the underlying filesystem
446 			 * fails to do so.
447 			 */
448 			sp->f_version = STATFS_VERSION;
449 			sp->f_namemax = NAME_MAX;
450 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
451 			/*
452 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
453 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
454 			 * overrides MNT_WAIT.
455 			 */
456 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
457 			    (flags & MNT_WAIT)) &&
458 			    (error = VFS_STATFS(mp, sp, td))) {
459 				VFS_UNLOCK_GIANT(vfslocked);
460 				mtx_lock(&mountlist_mtx);
461 				nmp = TAILQ_NEXT(mp, mnt_list);
462 				vfs_unbusy(mp, td);
463 				continue;
464 			}
465 			if (priv_check(td, PRIV_VFS_GENERATION)) {
466 				bcopy(sp, &sb, sizeof(sb));
467 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
468 				prison_enforce_statfs(td->td_ucred, mp, &sb);
469 				sp = &sb;
470 			}
471 			if (bufseg == UIO_SYSSPACE)
472 				bcopy(sp, sfsp, sizeof(*sp));
473 			else /* if (bufseg == UIO_USERSPACE) */ {
474 				error = copyout(sp, sfsp, sizeof(*sp));
475 				if (error) {
476 					vfs_unbusy(mp, td);
477 					VFS_UNLOCK_GIANT(vfslocked);
478 					return (error);
479 				}
480 			}
481 			sfsp++;
482 		}
483 		VFS_UNLOCK_GIANT(vfslocked);
484 		count++;
485 		mtx_lock(&mountlist_mtx);
486 		nmp = TAILQ_NEXT(mp, mnt_list);
487 		vfs_unbusy(mp, td);
488 	}
489 	mtx_unlock(&mountlist_mtx);
490 	if (sfsp && count > maxcount)
491 		td->td_retval[0] = maxcount;
492 	else
493 		td->td_retval[0] = count;
494 	return (0);
495 }
496 
497 #ifdef COMPAT_FREEBSD4
498 /*
499  * Get old format filesystem statistics.
500  */
501 static void cvtstatfs(struct statfs *, struct ostatfs *);
502 
503 #ifndef _SYS_SYSPROTO_H_
504 struct freebsd4_statfs_args {
505 	char *path;
506 	struct ostatfs *buf;
507 };
508 #endif
509 int
510 freebsd4_statfs(td, uap)
511 	struct thread *td;
512 	struct freebsd4_statfs_args /* {
513 		char *path;
514 		struct ostatfs *buf;
515 	} */ *uap;
516 {
517 	struct ostatfs osb;
518 	struct statfs sf;
519 	int error;
520 
521 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
522 	if (error)
523 		return (error);
524 	cvtstatfs(&sf, &osb);
525 	return (copyout(&osb, uap->buf, sizeof(osb)));
526 }
527 
528 /*
529  * Get filesystem statistics.
530  */
531 #ifndef _SYS_SYSPROTO_H_
532 struct freebsd4_fstatfs_args {
533 	int fd;
534 	struct ostatfs *buf;
535 };
536 #endif
537 int
538 freebsd4_fstatfs(td, uap)
539 	struct thread *td;
540 	struct freebsd4_fstatfs_args /* {
541 		int fd;
542 		struct ostatfs *buf;
543 	} */ *uap;
544 {
545 	struct ostatfs osb;
546 	struct statfs sf;
547 	int error;
548 
549 	error = kern_fstatfs(td, uap->fd, &sf);
550 	if (error)
551 		return (error);
552 	cvtstatfs(&sf, &osb);
553 	return (copyout(&osb, uap->buf, sizeof(osb)));
554 }
555 
556 /*
557  * Get statistics on all filesystems.
558  */
559 #ifndef _SYS_SYSPROTO_H_
560 struct freebsd4_getfsstat_args {
561 	struct ostatfs *buf;
562 	long bufsize;
563 	int flags;
564 };
565 #endif
566 int
567 freebsd4_getfsstat(td, uap)
568 	struct thread *td;
569 	register struct freebsd4_getfsstat_args /* {
570 		struct ostatfs *buf;
571 		long bufsize;
572 		int flags;
573 	} */ *uap;
574 {
575 	struct statfs *buf, *sp;
576 	struct ostatfs osb;
577 	size_t count, size;
578 	int error;
579 
580 	count = uap->bufsize / sizeof(struct ostatfs);
581 	size = count * sizeof(struct statfs);
582 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
583 	if (size > 0) {
584 		count = td->td_retval[0];
585 		sp = buf;
586 		while (count > 0 && error == 0) {
587 			cvtstatfs(sp, &osb);
588 			error = copyout(&osb, uap->buf, sizeof(osb));
589 			sp++;
590 			uap->buf++;
591 			count--;
592 		}
593 		free(buf, M_TEMP);
594 	}
595 	return (error);
596 }
597 
598 /*
599  * Implement fstatfs() for (NFS) file handles.
600  */
601 #ifndef _SYS_SYSPROTO_H_
602 struct freebsd4_fhstatfs_args {
603 	struct fhandle *u_fhp;
604 	struct ostatfs *buf;
605 };
606 #endif
607 int
608 freebsd4_fhstatfs(td, uap)
609 	struct thread *td;
610 	struct freebsd4_fhstatfs_args /* {
611 		struct fhandle *u_fhp;
612 		struct ostatfs *buf;
613 	} */ *uap;
614 {
615 	struct ostatfs osb;
616 	struct statfs sf;
617 	fhandle_t fh;
618 	int error;
619 
620 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621 	if (error)
622 		return (error);
623 	error = kern_fhstatfs(td, fh, &sf);
624 	if (error)
625 		return (error);
626 	cvtstatfs(&sf, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(nsp, osp)
635 	struct statfs *nsp;
636 	struct ostatfs *osp;
637 {
638 
639 	bzero(osp, sizeof(*osp));
640 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647 	osp->f_owner = nsp->f_owner;
648 	osp->f_type = nsp->f_type;
649 	osp->f_flags = nsp->f_flags;
650 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
655 	    MIN(MFSNAMELEN, OMFSNAMELEN));
656 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
657 	    MIN(MNAMELEN, OMNAMELEN));
658 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
659 	    MIN(MNAMELEN, OMNAMELEN));
660 	osp->f_fsid = nsp->f_fsid;
661 }
662 #endif /* COMPAT_FREEBSD4 */
663 
664 /*
665  * Change current working directory to a given file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct fchdir_args {
669 	int	fd;
670 };
671 #endif
672 int
673 fchdir(td, uap)
674 	struct thread *td;
675 	struct fchdir_args /* {
676 		int fd;
677 	} */ *uap;
678 {
679 	register struct filedesc *fdp = td->td_proc->p_fd;
680 	struct vnode *vp, *tdp, *vpold;
681 	struct mount *mp;
682 	struct file *fp;
683 	int vfslocked;
684 	int error;
685 
686 	AUDIT_ARG(fd, uap->fd);
687 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
688 		return (error);
689 	vp = fp->f_vnode;
690 	VREF(vp);
691 	fdrop(fp, td);
692 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
693 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
694 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
695 	error = change_dir(vp, td);
696 	while (!error && (mp = vp->v_mountedhere) != NULL) {
697 		int tvfslocked;
698 		if (vfs_busy(mp, 0, 0, td))
699 			continue;
700 		tvfslocked = VFS_LOCK_GIANT(mp);
701 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
702 		vfs_unbusy(mp, td);
703 		if (error) {
704 			VFS_UNLOCK_GIANT(tvfslocked);
705 			break;
706 		}
707 		vput(vp);
708 		VFS_UNLOCK_GIANT(vfslocked);
709 		vp = tdp;
710 		vfslocked = tvfslocked;
711 	}
712 	if (error) {
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		return (error);
716 	}
717 	VOP_UNLOCK(vp, 0, td);
718 	VFS_UNLOCK_GIANT(vfslocked);
719 	FILEDESC_XLOCK(fdp);
720 	vpold = fdp->fd_cdir;
721 	fdp->fd_cdir = vp;
722 	FILEDESC_XUNLOCK(fdp);
723 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
724 	vrele(vpold);
725 	VFS_UNLOCK_GIANT(vfslocked);
726 	return (0);
727 }
728 
729 /*
730  * Change current working directory (``.'').
731  */
732 #ifndef _SYS_SYSPROTO_H_
733 struct chdir_args {
734 	char	*path;
735 };
736 #endif
737 int
738 chdir(td, uap)
739 	struct thread *td;
740 	struct chdir_args /* {
741 		char *path;
742 	} */ *uap;
743 {
744 
745 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
746 }
747 
748 int
749 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
750 {
751 	register struct filedesc *fdp = td->td_proc->p_fd;
752 	int error;
753 	struct nameidata nd;
754 	struct vnode *vp;
755 	int vfslocked;
756 
757 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
758 	    pathseg, path, td);
759 	if ((error = namei(&nd)) != 0)
760 		return (error);
761 	vfslocked = NDHASGIANT(&nd);
762 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
763 		vput(nd.ni_vp);
764 		VFS_UNLOCK_GIANT(vfslocked);
765 		NDFREE(&nd, NDF_ONLY_PNBUF);
766 		return (error);
767 	}
768 	VOP_UNLOCK(nd.ni_vp, 0, td);
769 	VFS_UNLOCK_GIANT(vfslocked);
770 	NDFREE(&nd, NDF_ONLY_PNBUF);
771 	FILEDESC_XLOCK(fdp);
772 	vp = fdp->fd_cdir;
773 	fdp->fd_cdir = nd.ni_vp;
774 	FILEDESC_XUNLOCK(fdp);
775 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
776 	vrele(vp);
777 	VFS_UNLOCK_GIANT(vfslocked);
778 	return (0);
779 }
780 
781 /*
782  * Helper function for raised chroot(2) security function:  Refuse if
783  * any filedescriptors are open directories.
784  */
785 static int
786 chroot_refuse_vdir_fds(fdp)
787 	struct filedesc *fdp;
788 {
789 	struct vnode *vp;
790 	struct file *fp;
791 	int fd;
792 
793 	FILEDESC_LOCK_ASSERT(fdp);
794 
795 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
796 		fp = fget_locked(fdp, fd);
797 		if (fp == NULL)
798 			continue;
799 		if (fp->f_type == DTYPE_VNODE) {
800 			vp = fp->f_vnode;
801 			if (vp->v_type == VDIR)
802 				return (EPERM);
803 		}
804 	}
805 	return (0);
806 }
807 
808 /*
809  * This sysctl determines if we will allow a process to chroot(2) if it
810  * has a directory open:
811  *	0: disallowed for all processes.
812  *	1: allowed for processes that were not already chroot(2)'ed.
813  *	2: allowed for all processes.
814  */
815 
816 static int chroot_allow_open_directories = 1;
817 
818 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
819      &chroot_allow_open_directories, 0, "");
820 
821 /*
822  * Change notion of root (``/'') directory.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct chroot_args {
826 	char	*path;
827 };
828 #endif
829 int
830 chroot(td, uap)
831 	struct thread *td;
832 	struct chroot_args /* {
833 		char *path;
834 	} */ *uap;
835 {
836 	int error;
837 	struct nameidata nd;
838 	int vfslocked;
839 
840 	error = priv_check(td, PRIV_VFS_CHROOT);
841 	if (error)
842 		return (error);
843 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
844 	    UIO_USERSPACE, uap->path, td);
845 	error = namei(&nd);
846 	if (error)
847 		goto error;
848 	vfslocked = NDHASGIANT(&nd);
849 	if ((error = change_dir(nd.ni_vp, td)) != 0)
850 		goto e_vunlock;
851 #ifdef MAC
852 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
853 		goto e_vunlock;
854 #endif
855 	VOP_UNLOCK(nd.ni_vp, 0, td);
856 	error = change_root(nd.ni_vp, td);
857 	vrele(nd.ni_vp);
858 	VFS_UNLOCK_GIANT(vfslocked);
859 	NDFREE(&nd, NDF_ONLY_PNBUF);
860 	return (error);
861 e_vunlock:
862 	vput(nd.ni_vp);
863 	VFS_UNLOCK_GIANT(vfslocked);
864 error:
865 	NDFREE(&nd, NDF_ONLY_PNBUF);
866 	return (error);
867 }
868 
869 /*
870  * Common routine for chroot and chdir.  Callers must provide a locked vnode
871  * instance.
872  */
873 int
874 change_dir(vp, td)
875 	struct vnode *vp;
876 	struct thread *td;
877 {
878 	int error;
879 
880 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
881 	if (vp->v_type != VDIR)
882 		return (ENOTDIR);
883 #ifdef MAC
884 	error = mac_check_vnode_chdir(td->td_ucred, vp);
885 	if (error)
886 		return (error);
887 #endif
888 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
889 	return (error);
890 }
891 
892 /*
893  * Common routine for kern_chroot() and jail_attach().  The caller is
894  * responsible for invoking priv_check() and mac_check_chroot() to authorize
895  * this operation.
896  */
897 int
898 change_root(vp, td)
899 	struct vnode *vp;
900 	struct thread *td;
901 {
902 	struct filedesc *fdp;
903 	struct vnode *oldvp;
904 	int vfslocked;
905 	int error;
906 
907 	VFS_ASSERT_GIANT(vp->v_mount);
908 	fdp = td->td_proc->p_fd;
909 	FILEDESC_XLOCK(fdp);
910 	if (chroot_allow_open_directories == 0 ||
911 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
912 		error = chroot_refuse_vdir_fds(fdp);
913 		if (error) {
914 			FILEDESC_XUNLOCK(fdp);
915 			return (error);
916 		}
917 	}
918 	oldvp = fdp->fd_rdir;
919 	fdp->fd_rdir = vp;
920 	VREF(fdp->fd_rdir);
921 	if (!fdp->fd_jdir) {
922 		fdp->fd_jdir = vp;
923 		VREF(fdp->fd_jdir);
924 	}
925 	FILEDESC_XUNLOCK(fdp);
926 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
927 	vrele(oldvp);
928 	VFS_UNLOCK_GIANT(vfslocked);
929 	return (0);
930 }
931 
932 /*
933  * Check permissions, allocate an open file structure, and call the device
934  * open routine if any.
935  */
936 #ifndef _SYS_SYSPROTO_H_
937 struct open_args {
938 	char	*path;
939 	int	flags;
940 	int	mode;
941 };
942 #endif
943 int
944 open(td, uap)
945 	struct thread *td;
946 	register struct open_args /* {
947 		char *path;
948 		int flags;
949 		int mode;
950 	} */ *uap;
951 {
952 
953 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
954 }
955 
956 int
957 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
958     int mode)
959 {
960 	struct proc *p = td->td_proc;
961 	struct filedesc *fdp = p->p_fd;
962 	struct file *fp;
963 	struct vnode *vp;
964 	struct vattr vat;
965 	struct mount *mp;
966 	int cmode;
967 	struct file *nfp;
968 	int type, indx, error;
969 	struct flock lf;
970 	struct nameidata nd;
971 	int vfslocked;
972 
973 	AUDIT_ARG(fflags, flags);
974 	AUDIT_ARG(mode, mode);
975 	if ((flags & O_ACCMODE) == O_ACCMODE)
976 		return (EINVAL);
977 	flags = FFLAGS(flags);
978 	error = falloc(td, &nfp, &indx);
979 	if (error)
980 		return (error);
981 	/* An extra reference on `nfp' has been held for us by falloc(). */
982 	fp = nfp;
983 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
984 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
985 	td->td_dupfd = -1;		/* XXX check for fdopen */
986  	error = vn_open(&nd, &flags, cmode, fp);
987 	if (error) {
988 		/*
989 		 * If the vn_open replaced the method vector, something
990 		 * wonderous happened deep below and we just pass it up
991 		 * pretending we know what we do.
992 		 */
993 		if (error == ENXIO && fp->f_ops != &badfileops) {
994 			fdrop(fp, td);
995 			td->td_retval[0] = indx;
996 			return (0);
997 		}
998 
999 		/*
1000 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1001 		 * responsible for dropping the old contents of ofiles[indx]
1002 		 * if it succeeds.
1003 		 */
1004 		if ((error == ENODEV || error == ENXIO) &&
1005 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1006 		    (error =
1007 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1008 			td->td_retval[0] = indx;
1009 			fdrop(fp, td);
1010 			return (0);
1011 		}
1012 		/*
1013 		 * Clean up the descriptor, but only if another thread hadn't
1014 		 * replaced or closed it.
1015 		 */
1016 		fdclose(fdp, fp, indx, td);
1017 		fdrop(fp, td);
1018 
1019 		if (error == ERESTART)
1020 			error = EINTR;
1021 		return (error);
1022 	}
1023 	td->td_dupfd = 0;
1024 	vfslocked = NDHASGIANT(&nd);
1025 	NDFREE(&nd, NDF_ONLY_PNBUF);
1026 	vp = nd.ni_vp;
1027 
1028 	FILE_LOCK(fp);
1029 	fp->f_vnode = vp;
1030 	if (fp->f_data == NULL)
1031 		fp->f_data = vp;
1032 	fp->f_flag = flags & FMASK;
1033 	fp->f_seqcount = 1;
1034 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1035 	if (fp->f_ops == &badfileops)
1036 		fp->f_ops = &vnops;
1037 	FILE_UNLOCK(fp);
1038 
1039 	VOP_UNLOCK(vp, 0, td);
1040 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1041 		lf.l_whence = SEEK_SET;
1042 		lf.l_start = 0;
1043 		lf.l_len = 0;
1044 		if (flags & O_EXLOCK)
1045 			lf.l_type = F_WRLCK;
1046 		else
1047 			lf.l_type = F_RDLCK;
1048 		type = F_FLOCK;
1049 		if ((flags & FNONBLOCK) == 0)
1050 			type |= F_WAIT;
1051 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1052 			    type)) != 0)
1053 			goto bad;
1054 		fp->f_flag |= FHASLOCK;
1055 	}
1056 	if (flags & O_TRUNC) {
1057 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1058 			goto bad;
1059 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1060 		VATTR_NULL(&vat);
1061 		vat.va_size = 0;
1062 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1063 #ifdef MAC
1064 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1065 		if (error == 0)
1066 #endif
1067 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1068 		VOP_UNLOCK(vp, 0, td);
1069 		vn_finished_write(mp);
1070 		if (error)
1071 			goto bad;
1072 	}
1073 	VFS_UNLOCK_GIANT(vfslocked);
1074 	/*
1075 	 * Release our private reference, leaving the one associated with
1076 	 * the descriptor table intact.
1077 	 */
1078 	fdrop(fp, td);
1079 	td->td_retval[0] = indx;
1080 	return (0);
1081 bad:
1082 	VFS_UNLOCK_GIANT(vfslocked);
1083 	fdclose(fdp, fp, indx, td);
1084 	fdrop(fp, td);
1085 	return (error);
1086 }
1087 
1088 #ifdef COMPAT_43
1089 /*
1090  * Create a file.
1091  */
1092 #ifndef _SYS_SYSPROTO_H_
1093 struct ocreat_args {
1094 	char	*path;
1095 	int	mode;
1096 };
1097 #endif
1098 int
1099 ocreat(td, uap)
1100 	struct thread *td;
1101 	register struct ocreat_args /* {
1102 		char *path;
1103 		int mode;
1104 	} */ *uap;
1105 {
1106 
1107 	return (kern_open(td, uap->path, UIO_USERSPACE,
1108 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1109 }
1110 #endif /* COMPAT_43 */
1111 
1112 /*
1113  * Create a special file.
1114  */
1115 #ifndef _SYS_SYSPROTO_H_
1116 struct mknod_args {
1117 	char	*path;
1118 	int	mode;
1119 	int	dev;
1120 };
1121 #endif
1122 int
1123 mknod(td, uap)
1124 	struct thread *td;
1125 	register struct mknod_args /* {
1126 		char *path;
1127 		int mode;
1128 		int dev;
1129 	} */ *uap;
1130 {
1131 
1132 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1133 }
1134 
1135 int
1136 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1137     int dev)
1138 {
1139 	struct vnode *vp;
1140 	struct mount *mp;
1141 	struct vattr vattr;
1142 	int error;
1143 	int whiteout = 0;
1144 	struct nameidata nd;
1145 	int vfslocked;
1146 
1147 	AUDIT_ARG(mode, mode);
1148 	AUDIT_ARG(dev, dev);
1149 	switch (mode & S_IFMT) {
1150 	case S_IFCHR:
1151 	case S_IFBLK:
1152 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1153 		break;
1154 	case S_IFMT:
1155 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1156 		break;
1157 	case S_IFWHT:
1158 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1159 		break;
1160 	default:
1161 		error = EINVAL;
1162 		break;
1163 	}
1164 	if (error)
1165 		return (error);
1166 restart:
1167 	bwillwrite();
1168 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1169 	    pathseg, path, td);
1170 	if ((error = namei(&nd)) != 0)
1171 		return (error);
1172 	vfslocked = NDHASGIANT(&nd);
1173 	vp = nd.ni_vp;
1174 	if (vp != NULL) {
1175 		NDFREE(&nd, NDF_ONLY_PNBUF);
1176 		if (vp == nd.ni_dvp)
1177 			vrele(nd.ni_dvp);
1178 		else
1179 			vput(nd.ni_dvp);
1180 		vrele(vp);
1181 		VFS_UNLOCK_GIANT(vfslocked);
1182 		return (EEXIST);
1183 	} else {
1184 		VATTR_NULL(&vattr);
1185 		FILEDESC_SLOCK(td->td_proc->p_fd);
1186 		vattr.va_mode = (mode & ALLPERMS) &
1187 		    ~td->td_proc->p_fd->fd_cmask;
1188 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1189 		vattr.va_rdev = dev;
1190 		whiteout = 0;
1191 
1192 		switch (mode & S_IFMT) {
1193 		case S_IFMT:	/* used by badsect to flag bad sectors */
1194 			vattr.va_type = VBAD;
1195 			break;
1196 		case S_IFCHR:
1197 			vattr.va_type = VCHR;
1198 			break;
1199 		case S_IFBLK:
1200 			vattr.va_type = VBLK;
1201 			break;
1202 		case S_IFWHT:
1203 			whiteout = 1;
1204 			break;
1205 		default:
1206 			panic("kern_mknod: invalid mode");
1207 		}
1208 	}
1209 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1210 		NDFREE(&nd, NDF_ONLY_PNBUF);
1211 		vput(nd.ni_dvp);
1212 		VFS_UNLOCK_GIANT(vfslocked);
1213 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1214 			return (error);
1215 		goto restart;
1216 	}
1217 #ifdef MAC
1218 	if (error == 0 && !whiteout)
1219 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1220 		    &nd.ni_cnd, &vattr);
1221 #endif
1222 	if (!error) {
1223 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1224 		if (whiteout)
1225 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1226 		else {
1227 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1228 						&nd.ni_cnd, &vattr);
1229 			if (error == 0)
1230 				vput(nd.ni_vp);
1231 		}
1232 	}
1233 	NDFREE(&nd, NDF_ONLY_PNBUF);
1234 	vput(nd.ni_dvp);
1235 	vn_finished_write(mp);
1236 	VFS_UNLOCK_GIANT(vfslocked);
1237 	return (error);
1238 }
1239 
1240 /*
1241  * Create a named pipe.
1242  */
1243 #ifndef _SYS_SYSPROTO_H_
1244 struct mkfifo_args {
1245 	char	*path;
1246 	int	mode;
1247 };
1248 #endif
1249 int
1250 mkfifo(td, uap)
1251 	struct thread *td;
1252 	register struct mkfifo_args /* {
1253 		char *path;
1254 		int mode;
1255 	} */ *uap;
1256 {
1257 
1258 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1259 }
1260 
1261 int
1262 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1263 {
1264 	struct mount *mp;
1265 	struct vattr vattr;
1266 	int error;
1267 	struct nameidata nd;
1268 	int vfslocked;
1269 
1270 	AUDIT_ARG(mode, mode);
1271 restart:
1272 	bwillwrite();
1273 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1274 	    pathseg, path, td);
1275 	if ((error = namei(&nd)) != 0)
1276 		return (error);
1277 	vfslocked = NDHASGIANT(&nd);
1278 	if (nd.ni_vp != NULL) {
1279 		NDFREE(&nd, NDF_ONLY_PNBUF);
1280 		if (nd.ni_vp == nd.ni_dvp)
1281 			vrele(nd.ni_dvp);
1282 		else
1283 			vput(nd.ni_dvp);
1284 		vrele(nd.ni_vp);
1285 		VFS_UNLOCK_GIANT(vfslocked);
1286 		return (EEXIST);
1287 	}
1288 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1289 		NDFREE(&nd, NDF_ONLY_PNBUF);
1290 		vput(nd.ni_dvp);
1291 		VFS_UNLOCK_GIANT(vfslocked);
1292 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1293 			return (error);
1294 		goto restart;
1295 	}
1296 	VATTR_NULL(&vattr);
1297 	vattr.va_type = VFIFO;
1298 	FILEDESC_SLOCK(td->td_proc->p_fd);
1299 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1300 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1301 #ifdef MAC
1302 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1303 	    &vattr);
1304 	if (error)
1305 		goto out;
1306 #endif
1307 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1308 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1309 	if (error == 0)
1310 		vput(nd.ni_vp);
1311 #ifdef MAC
1312 out:
1313 #endif
1314 	vput(nd.ni_dvp);
1315 	vn_finished_write(mp);
1316 	VFS_UNLOCK_GIANT(vfslocked);
1317 	NDFREE(&nd, NDF_ONLY_PNBUF);
1318 	return (error);
1319 }
1320 
1321 /*
1322  * Make a hard file link.
1323  */
1324 #ifndef _SYS_SYSPROTO_H_
1325 struct link_args {
1326 	char	*path;
1327 	char	*link;
1328 };
1329 #endif
1330 int
1331 link(td, uap)
1332 	struct thread *td;
1333 	register struct link_args /* {
1334 		char *path;
1335 		char *link;
1336 	} */ *uap;
1337 {
1338 	int error;
1339 
1340 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1341 	return (error);
1342 }
1343 
1344 static int hardlink_check_uid = 0;
1345 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1346     &hardlink_check_uid, 0,
1347     "Unprivileged processes cannot create hard links to files owned by other "
1348     "users");
1349 static int hardlink_check_gid = 0;
1350 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1351     &hardlink_check_gid, 0,
1352     "Unprivileged processes cannot create hard links to files owned by other "
1353     "groups");
1354 
1355 static int
1356 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1357 {
1358 	struct vattr va;
1359 	int error;
1360 
1361 	if (!hardlink_check_uid && !hardlink_check_gid)
1362 		return (0);
1363 
1364 	error = VOP_GETATTR(vp, &va, cred, td);
1365 	if (error != 0)
1366 		return (error);
1367 
1368 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1369 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1370 		if (error)
1371 			return (error);
1372 	}
1373 
1374 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1375 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1376 		if (error)
1377 			return (error);
1378 	}
1379 
1380 	return (0);
1381 }
1382 
1383 int
1384 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1385 {
1386 	struct vnode *vp;
1387 	struct mount *mp;
1388 	struct nameidata nd;
1389 	int vfslocked;
1390 	int lvfslocked;
1391 	int error;
1392 
1393 	bwillwrite();
1394 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1395 	if ((error = namei(&nd)) != 0)
1396 		return (error);
1397 	vfslocked = NDHASGIANT(&nd);
1398 	NDFREE(&nd, NDF_ONLY_PNBUF);
1399 	vp = nd.ni_vp;
1400 	if (vp->v_type == VDIR) {
1401 		vrele(vp);
1402 		VFS_UNLOCK_GIANT(vfslocked);
1403 		return (EPERM);		/* POSIX */
1404 	}
1405 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1406 		vrele(vp);
1407 		VFS_UNLOCK_GIANT(vfslocked);
1408 		return (error);
1409 	}
1410 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1411 	    segflg, link, td);
1412 	if ((error = namei(&nd)) == 0) {
1413 		lvfslocked = NDHASGIANT(&nd);
1414 		if (nd.ni_vp != NULL) {
1415 			if (nd.ni_dvp == nd.ni_vp)
1416 				vrele(nd.ni_dvp);
1417 			else
1418 				vput(nd.ni_dvp);
1419 			vrele(nd.ni_vp);
1420 			error = EEXIST;
1421 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1422 		    == 0) {
1423 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1424 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1425 			error = can_hardlink(vp, td, td->td_ucred);
1426 			if (error == 0)
1427 #ifdef MAC
1428 				error = mac_check_vnode_link(td->td_ucred,
1429 				    nd.ni_dvp, vp, &nd.ni_cnd);
1430 			if (error == 0)
1431 #endif
1432 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1433 			VOP_UNLOCK(vp, 0, td);
1434 			vput(nd.ni_dvp);
1435 		}
1436 		NDFREE(&nd, NDF_ONLY_PNBUF);
1437 		VFS_UNLOCK_GIANT(lvfslocked);
1438 	}
1439 	vrele(vp);
1440 	vn_finished_write(mp);
1441 	VFS_UNLOCK_GIANT(vfslocked);
1442 	return (error);
1443 }
1444 
1445 /*
1446  * Make a symbolic link.
1447  */
1448 #ifndef _SYS_SYSPROTO_H_
1449 struct symlink_args {
1450 	char	*path;
1451 	char	*link;
1452 };
1453 #endif
1454 int
1455 symlink(td, uap)
1456 	struct thread *td;
1457 	register struct symlink_args /* {
1458 		char *path;
1459 		char *link;
1460 	} */ *uap;
1461 {
1462 
1463 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1464 }
1465 
1466 int
1467 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1468 {
1469 	struct mount *mp;
1470 	struct vattr vattr;
1471 	char *syspath;
1472 	int error;
1473 	struct nameidata nd;
1474 	int vfslocked;
1475 
1476 	if (segflg == UIO_SYSSPACE) {
1477 		syspath = path;
1478 	} else {
1479 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1480 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1481 			goto out;
1482 	}
1483 	AUDIT_ARG(text, syspath);
1484 restart:
1485 	bwillwrite();
1486 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1487 	    segflg, link, td);
1488 	if ((error = namei(&nd)) != 0)
1489 		goto out;
1490 	vfslocked = NDHASGIANT(&nd);
1491 	if (nd.ni_vp) {
1492 		NDFREE(&nd, NDF_ONLY_PNBUF);
1493 		if (nd.ni_vp == nd.ni_dvp)
1494 			vrele(nd.ni_dvp);
1495 		else
1496 			vput(nd.ni_dvp);
1497 		vrele(nd.ni_vp);
1498 		VFS_UNLOCK_GIANT(vfslocked);
1499 		error = EEXIST;
1500 		goto out;
1501 	}
1502 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1503 		NDFREE(&nd, NDF_ONLY_PNBUF);
1504 		vput(nd.ni_dvp);
1505 		VFS_UNLOCK_GIANT(vfslocked);
1506 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1507 			goto out;
1508 		goto restart;
1509 	}
1510 	VATTR_NULL(&vattr);
1511 	FILEDESC_SLOCK(td->td_proc->p_fd);
1512 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1513 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1514 #ifdef MAC
1515 	vattr.va_type = VLNK;
1516 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1517 	    &vattr);
1518 	if (error)
1519 		goto out2;
1520 #endif
1521 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1522 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1523 	if (error == 0)
1524 		vput(nd.ni_vp);
1525 #ifdef MAC
1526 out2:
1527 #endif
1528 	NDFREE(&nd, NDF_ONLY_PNBUF);
1529 	vput(nd.ni_dvp);
1530 	vn_finished_write(mp);
1531 	VFS_UNLOCK_GIANT(vfslocked);
1532 out:
1533 	if (segflg != UIO_SYSSPACE)
1534 		uma_zfree(namei_zone, syspath);
1535 	return (error);
1536 }
1537 
1538 /*
1539  * Delete a whiteout from the filesystem.
1540  */
1541 int
1542 undelete(td, uap)
1543 	struct thread *td;
1544 	register struct undelete_args /* {
1545 		char *path;
1546 	} */ *uap;
1547 {
1548 	int error;
1549 	struct mount *mp;
1550 	struct nameidata nd;
1551 	int vfslocked;
1552 
1553 restart:
1554 	bwillwrite();
1555 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1556 	    UIO_USERSPACE, uap->path, td);
1557 	error = namei(&nd);
1558 	if (error)
1559 		return (error);
1560 	vfslocked = NDHASGIANT(&nd);
1561 
1562 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1563 		NDFREE(&nd, NDF_ONLY_PNBUF);
1564 		if (nd.ni_vp == nd.ni_dvp)
1565 			vrele(nd.ni_dvp);
1566 		else
1567 			vput(nd.ni_dvp);
1568 		if (nd.ni_vp)
1569 			vrele(nd.ni_vp);
1570 		VFS_UNLOCK_GIANT(vfslocked);
1571 		return (EEXIST);
1572 	}
1573 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1574 		NDFREE(&nd, NDF_ONLY_PNBUF);
1575 		vput(nd.ni_dvp);
1576 		VFS_UNLOCK_GIANT(vfslocked);
1577 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1578 			return (error);
1579 		goto restart;
1580 	}
1581 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1582 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1583 	NDFREE(&nd, NDF_ONLY_PNBUF);
1584 	vput(nd.ni_dvp);
1585 	vn_finished_write(mp);
1586 	VFS_UNLOCK_GIANT(vfslocked);
1587 	return (error);
1588 }
1589 
1590 /*
1591  * Delete a name from the filesystem.
1592  */
1593 #ifndef _SYS_SYSPROTO_H_
1594 struct unlink_args {
1595 	char	*path;
1596 };
1597 #endif
1598 int
1599 unlink(td, uap)
1600 	struct thread *td;
1601 	struct unlink_args /* {
1602 		char *path;
1603 	} */ *uap;
1604 {
1605 	int error;
1606 
1607 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1608 	return (error);
1609 }
1610 
1611 int
1612 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1613 {
1614 	struct mount *mp;
1615 	struct vnode *vp;
1616 	int error;
1617 	struct nameidata nd;
1618 	int vfslocked;
1619 
1620 restart:
1621 	bwillwrite();
1622 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1623 	    pathseg, path, td);
1624 	if ((error = namei(&nd)) != 0)
1625 		return (error == EINVAL ? EPERM : error);
1626 	vfslocked = NDHASGIANT(&nd);
1627 	vp = nd.ni_vp;
1628 	if (vp->v_type == VDIR)
1629 		error = EPERM;		/* POSIX */
1630 	else {
1631 		/*
1632 		 * The root of a mounted filesystem cannot be deleted.
1633 		 *
1634 		 * XXX: can this only be a VDIR case?
1635 		 */
1636 		if (vp->v_vflag & VV_ROOT)
1637 			error = EBUSY;
1638 	}
1639 	if (error == 0) {
1640 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1641 			NDFREE(&nd, NDF_ONLY_PNBUF);
1642 			vput(nd.ni_dvp);
1643 			if (vp == nd.ni_dvp)
1644 				vrele(vp);
1645 			else
1646 				vput(vp);
1647 			VFS_UNLOCK_GIANT(vfslocked);
1648 			if ((error = vn_start_write(NULL, &mp,
1649 			    V_XSLEEP | PCATCH)) != 0)
1650 				return (error);
1651 			goto restart;
1652 		}
1653 #ifdef MAC
1654 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1655 		    &nd.ni_cnd);
1656 		if (error)
1657 			goto out;
1658 #endif
1659 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1660 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1661 #ifdef MAC
1662 out:
1663 #endif
1664 		vn_finished_write(mp);
1665 	}
1666 	NDFREE(&nd, NDF_ONLY_PNBUF);
1667 	vput(nd.ni_dvp);
1668 	if (vp == nd.ni_dvp)
1669 		vrele(vp);
1670 	else
1671 		vput(vp);
1672 	VFS_UNLOCK_GIANT(vfslocked);
1673 	return (error);
1674 }
1675 
1676 /*
1677  * Reposition read/write file offset.
1678  */
1679 #ifndef _SYS_SYSPROTO_H_
1680 struct lseek_args {
1681 	int	fd;
1682 	int	pad;
1683 	off_t	offset;
1684 	int	whence;
1685 };
1686 #endif
1687 int
1688 lseek(td, uap)
1689 	struct thread *td;
1690 	register struct lseek_args /* {
1691 		int fd;
1692 		int pad;
1693 		off_t offset;
1694 		int whence;
1695 	} */ *uap;
1696 {
1697 	struct ucred *cred = td->td_ucred;
1698 	struct file *fp;
1699 	struct vnode *vp;
1700 	struct vattr vattr;
1701 	off_t offset;
1702 	int error, noneg;
1703 	int vfslocked;
1704 
1705 	if ((error = fget(td, uap->fd, &fp)) != 0)
1706 		return (error);
1707 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1708 		fdrop(fp, td);
1709 		return (ESPIPE);
1710 	}
1711 	vp = fp->f_vnode;
1712 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1713 	noneg = (vp->v_type != VCHR);
1714 	offset = uap->offset;
1715 	switch (uap->whence) {
1716 	case L_INCR:
1717 		if (noneg &&
1718 		    (fp->f_offset < 0 ||
1719 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1720 			error = EOVERFLOW;
1721 			break;
1722 		}
1723 		offset += fp->f_offset;
1724 		break;
1725 	case L_XTND:
1726 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1727 		error = VOP_GETATTR(vp, &vattr, cred, td);
1728 		VOP_UNLOCK(vp, 0, td);
1729 		if (error)
1730 			break;
1731 		if (noneg &&
1732 		    (vattr.va_size > OFF_MAX ||
1733 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1734 			error = EOVERFLOW;
1735 			break;
1736 		}
1737 		offset += vattr.va_size;
1738 		break;
1739 	case L_SET:
1740 		break;
1741 	case SEEK_DATA:
1742 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1743 		break;
1744 	case SEEK_HOLE:
1745 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1746 		break;
1747 	default:
1748 		error = EINVAL;
1749 	}
1750 	if (error == 0 && noneg && offset < 0)
1751 		error = EINVAL;
1752 	if (error != 0)
1753 		goto drop;
1754 	fp->f_offset = offset;
1755 	*(off_t *)(td->td_retval) = fp->f_offset;
1756 drop:
1757 	fdrop(fp, td);
1758 	VFS_UNLOCK_GIANT(vfslocked);
1759 	return (error);
1760 }
1761 
1762 #if defined(COMPAT_43)
1763 /*
1764  * Reposition read/write file offset.
1765  */
1766 #ifndef _SYS_SYSPROTO_H_
1767 struct olseek_args {
1768 	int	fd;
1769 	long	offset;
1770 	int	whence;
1771 };
1772 #endif
1773 int
1774 olseek(td, uap)
1775 	struct thread *td;
1776 	register struct olseek_args /* {
1777 		int fd;
1778 		long offset;
1779 		int whence;
1780 	} */ *uap;
1781 {
1782 	struct lseek_args /* {
1783 		int fd;
1784 		int pad;
1785 		off_t offset;
1786 		int whence;
1787 	} */ nuap;
1788 
1789 	nuap.fd = uap->fd;
1790 	nuap.offset = uap->offset;
1791 	nuap.whence = uap->whence;
1792 	return (lseek(td, &nuap));
1793 }
1794 #endif /* COMPAT_43 */
1795 
1796 /* Version with the 'pad' argument */
1797 int
1798 freebsd6_lseek(td, uap)
1799 	struct thread *td;
1800 	register struct freebsd6_lseek_args *uap;
1801 {
1802 	struct lseek_args ouap;
1803 
1804 	ouap.fd = uap->fd;
1805 	ouap.offset = uap->offset;
1806 	ouap.whence = uap->whence;
1807 	return (lseek(td, &ouap));
1808 }
1809 
1810 /*
1811  * Check access permissions using passed credentials.
1812  */
1813 static int
1814 vn_access(vp, user_flags, cred, td)
1815 	struct vnode	*vp;
1816 	int		user_flags;
1817 	struct ucred	*cred;
1818 	struct thread	*td;
1819 {
1820 	int error, flags;
1821 
1822 	/* Flags == 0 means only check for existence. */
1823 	error = 0;
1824 	if (user_flags) {
1825 		flags = 0;
1826 		if (user_flags & R_OK)
1827 			flags |= VREAD;
1828 		if (user_flags & W_OK)
1829 			flags |= VWRITE;
1830 		if (user_flags & X_OK)
1831 			flags |= VEXEC;
1832 #ifdef MAC
1833 		error = mac_check_vnode_access(cred, vp, flags);
1834 		if (error)
1835 			return (error);
1836 #endif
1837 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1838 			error = VOP_ACCESS(vp, flags, cred, td);
1839 	}
1840 	return (error);
1841 }
1842 
1843 /*
1844  * Check access permissions using "real" credentials.
1845  */
1846 #ifndef _SYS_SYSPROTO_H_
1847 struct access_args {
1848 	char	*path;
1849 	int	flags;
1850 };
1851 #endif
1852 int
1853 access(td, uap)
1854 	struct thread *td;
1855 	register struct access_args /* {
1856 		char *path;
1857 		int flags;
1858 	} */ *uap;
1859 {
1860 
1861 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1862 }
1863 
1864 int
1865 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1866 {
1867 	struct ucred *cred, *tmpcred;
1868 	register struct vnode *vp;
1869 	struct nameidata nd;
1870 	int vfslocked;
1871 	int error;
1872 
1873 	/*
1874 	 * Create and modify a temporary credential instead of one that
1875 	 * is potentially shared.  This could also mess up socket
1876 	 * buffer accounting which can run in an interrupt context.
1877 	 */
1878 	cred = td->td_ucred;
1879 	tmpcred = crdup(cred);
1880 	tmpcred->cr_uid = cred->cr_ruid;
1881 	tmpcred->cr_groups[0] = cred->cr_rgid;
1882 	td->td_ucred = tmpcred;
1883 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1884 	    pathseg, path, td);
1885 	if ((error = namei(&nd)) != 0)
1886 		goto out1;
1887 	vfslocked = NDHASGIANT(&nd);
1888 	vp = nd.ni_vp;
1889 
1890 	error = vn_access(vp, flags, tmpcred, td);
1891 	NDFREE(&nd, NDF_ONLY_PNBUF);
1892 	vput(vp);
1893 	VFS_UNLOCK_GIANT(vfslocked);
1894 out1:
1895 	td->td_ucred = cred;
1896 	crfree(tmpcred);
1897 	return (error);
1898 }
1899 
1900 /*
1901  * Check access permissions using "effective" credentials.
1902  */
1903 #ifndef _SYS_SYSPROTO_H_
1904 struct eaccess_args {
1905 	char	*path;
1906 	int	flags;
1907 };
1908 #endif
1909 int
1910 eaccess(td, uap)
1911 	struct thread *td;
1912 	register struct eaccess_args /* {
1913 		char *path;
1914 		int flags;
1915 	} */ *uap;
1916 {
1917 
1918 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1919 }
1920 
1921 int
1922 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1923 {
1924 	struct nameidata nd;
1925 	struct vnode *vp;
1926 	int vfslocked;
1927 	int error;
1928 
1929 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1930 	    pathseg, path, td);
1931 	if ((error = namei(&nd)) != 0)
1932 		return (error);
1933 	vp = nd.ni_vp;
1934 	vfslocked = NDHASGIANT(&nd);
1935 	error = vn_access(vp, flags, td->td_ucred, td);
1936 	NDFREE(&nd, NDF_ONLY_PNBUF);
1937 	vput(vp);
1938 	VFS_UNLOCK_GIANT(vfslocked);
1939 	return (error);
1940 }
1941 
1942 #if defined(COMPAT_43)
1943 /*
1944  * Get file status; this version follows links.
1945  */
1946 #ifndef _SYS_SYSPROTO_H_
1947 struct ostat_args {
1948 	char	*path;
1949 	struct ostat *ub;
1950 };
1951 #endif
1952 int
1953 ostat(td, uap)
1954 	struct thread *td;
1955 	register struct ostat_args /* {
1956 		char *path;
1957 		struct ostat *ub;
1958 	} */ *uap;
1959 {
1960 	struct stat sb;
1961 	struct ostat osb;
1962 	int error;
1963 
1964 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1965 	if (error)
1966 		return (error);
1967 	cvtstat(&sb, &osb);
1968 	error = copyout(&osb, uap->ub, sizeof (osb));
1969 	return (error);
1970 }
1971 
1972 /*
1973  * Get file status; this version does not follow links.
1974  */
1975 #ifndef _SYS_SYSPROTO_H_
1976 struct olstat_args {
1977 	char	*path;
1978 	struct ostat *ub;
1979 };
1980 #endif
1981 int
1982 olstat(td, uap)
1983 	struct thread *td;
1984 	register struct olstat_args /* {
1985 		char *path;
1986 		struct ostat *ub;
1987 	} */ *uap;
1988 {
1989 	struct stat sb;
1990 	struct ostat osb;
1991 	int error;
1992 
1993 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1994 	if (error)
1995 		return (error);
1996 	cvtstat(&sb, &osb);
1997 	error = copyout(&osb, uap->ub, sizeof (osb));
1998 	return (error);
1999 }
2000 
2001 /*
2002  * Convert from an old to a new stat structure.
2003  */
2004 void
2005 cvtstat(st, ost)
2006 	struct stat *st;
2007 	struct ostat *ost;
2008 {
2009 
2010 	ost->st_dev = st->st_dev;
2011 	ost->st_ino = st->st_ino;
2012 	ost->st_mode = st->st_mode;
2013 	ost->st_nlink = st->st_nlink;
2014 	ost->st_uid = st->st_uid;
2015 	ost->st_gid = st->st_gid;
2016 	ost->st_rdev = st->st_rdev;
2017 	if (st->st_size < (quad_t)1 << 32)
2018 		ost->st_size = st->st_size;
2019 	else
2020 		ost->st_size = -2;
2021 	ost->st_atime = st->st_atime;
2022 	ost->st_mtime = st->st_mtime;
2023 	ost->st_ctime = st->st_ctime;
2024 	ost->st_blksize = st->st_blksize;
2025 	ost->st_blocks = st->st_blocks;
2026 	ost->st_flags = st->st_flags;
2027 	ost->st_gen = st->st_gen;
2028 }
2029 #endif /* COMPAT_43 */
2030 
2031 /*
2032  * Get file status; this version follows links.
2033  */
2034 #ifndef _SYS_SYSPROTO_H_
2035 struct stat_args {
2036 	char	*path;
2037 	struct stat *ub;
2038 };
2039 #endif
2040 int
2041 stat(td, uap)
2042 	struct thread *td;
2043 	register struct stat_args /* {
2044 		char *path;
2045 		struct stat *ub;
2046 	} */ *uap;
2047 {
2048 	struct stat sb;
2049 	int error;
2050 
2051 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2052 	if (error == 0)
2053 		error = copyout(&sb, uap->ub, sizeof (sb));
2054 	return (error);
2055 }
2056 
2057 int
2058 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2059 {
2060 	struct nameidata nd;
2061 	struct stat sb;
2062 	int error, vfslocked;
2063 
2064 	NDINIT(&nd, LOOKUP,
2065 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2066 	    pathseg, path, td);
2067 	if ((error = namei(&nd)) != 0)
2068 		return (error);
2069 	vfslocked = NDHASGIANT(&nd);
2070 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2071 	NDFREE(&nd, NDF_ONLY_PNBUF);
2072 	vput(nd.ni_vp);
2073 	VFS_UNLOCK_GIANT(vfslocked);
2074 	if (mtx_owned(&Giant))
2075 		printf("stat(%d): %s\n", vfslocked, path);
2076 	if (error)
2077 		return (error);
2078 	*sbp = sb;
2079 	return (0);
2080 }
2081 
2082 /*
2083  * Get file status; this version does not follow links.
2084  */
2085 #ifndef _SYS_SYSPROTO_H_
2086 struct lstat_args {
2087 	char	*path;
2088 	struct stat *ub;
2089 };
2090 #endif
2091 int
2092 lstat(td, uap)
2093 	struct thread *td;
2094 	register struct lstat_args /* {
2095 		char *path;
2096 		struct stat *ub;
2097 	} */ *uap;
2098 {
2099 	struct stat sb;
2100 	int error;
2101 
2102 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2103 	if (error == 0)
2104 		error = copyout(&sb, uap->ub, sizeof (sb));
2105 	return (error);
2106 }
2107 
2108 int
2109 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2110 {
2111 	struct vnode *vp;
2112 	struct stat sb;
2113 	struct nameidata nd;
2114 	int error, vfslocked;
2115 
2116 	NDINIT(&nd, LOOKUP,
2117 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2118 	    pathseg, path, td);
2119 	if ((error = namei(&nd)) != 0)
2120 		return (error);
2121 	vfslocked = NDHASGIANT(&nd);
2122 	vp = nd.ni_vp;
2123 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2124 	NDFREE(&nd, NDF_ONLY_PNBUF);
2125 	vput(vp);
2126 	VFS_UNLOCK_GIANT(vfslocked);
2127 	if (error)
2128 		return (error);
2129 	*sbp = sb;
2130 	return (0);
2131 }
2132 
2133 /*
2134  * Implementation of the NetBSD [l]stat() functions.
2135  */
2136 void
2137 cvtnstat(sb, nsb)
2138 	struct stat *sb;
2139 	struct nstat *nsb;
2140 {
2141 	bzero(nsb, sizeof *nsb);
2142 	nsb->st_dev = sb->st_dev;
2143 	nsb->st_ino = sb->st_ino;
2144 	nsb->st_mode = sb->st_mode;
2145 	nsb->st_nlink = sb->st_nlink;
2146 	nsb->st_uid = sb->st_uid;
2147 	nsb->st_gid = sb->st_gid;
2148 	nsb->st_rdev = sb->st_rdev;
2149 	nsb->st_atimespec = sb->st_atimespec;
2150 	nsb->st_mtimespec = sb->st_mtimespec;
2151 	nsb->st_ctimespec = sb->st_ctimespec;
2152 	nsb->st_size = sb->st_size;
2153 	nsb->st_blocks = sb->st_blocks;
2154 	nsb->st_blksize = sb->st_blksize;
2155 	nsb->st_flags = sb->st_flags;
2156 	nsb->st_gen = sb->st_gen;
2157 	nsb->st_birthtimespec = sb->st_birthtimespec;
2158 }
2159 
2160 #ifndef _SYS_SYSPROTO_H_
2161 struct nstat_args {
2162 	char	*path;
2163 	struct nstat *ub;
2164 };
2165 #endif
2166 int
2167 nstat(td, uap)
2168 	struct thread *td;
2169 	register struct nstat_args /* {
2170 		char *path;
2171 		struct nstat *ub;
2172 	} */ *uap;
2173 {
2174 	struct stat sb;
2175 	struct nstat nsb;
2176 	int error;
2177 
2178 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2179 	if (error)
2180 		return (error);
2181 	cvtnstat(&sb, &nsb);
2182 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2183 	return (error);
2184 }
2185 
2186 /*
2187  * NetBSD lstat.  Get file status; this version does not follow links.
2188  */
2189 #ifndef _SYS_SYSPROTO_H_
2190 struct lstat_args {
2191 	char	*path;
2192 	struct stat *ub;
2193 };
2194 #endif
2195 int
2196 nlstat(td, uap)
2197 	struct thread *td;
2198 	register struct nlstat_args /* {
2199 		char *path;
2200 		struct nstat *ub;
2201 	} */ *uap;
2202 {
2203 	struct stat sb;
2204 	struct nstat nsb;
2205 	int error;
2206 
2207 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2208 	if (error)
2209 		return (error);
2210 	cvtnstat(&sb, &nsb);
2211 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2212 	return (error);
2213 }
2214 
2215 /*
2216  * Get configurable pathname variables.
2217  */
2218 #ifndef _SYS_SYSPROTO_H_
2219 struct pathconf_args {
2220 	char	*path;
2221 	int	name;
2222 };
2223 #endif
2224 int
2225 pathconf(td, uap)
2226 	struct thread *td;
2227 	register struct pathconf_args /* {
2228 		char *path;
2229 		int name;
2230 	} */ *uap;
2231 {
2232 
2233 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2234 }
2235 
2236 int
2237 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2238 {
2239 	struct nameidata nd;
2240 	int error, vfslocked;
2241 
2242 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2243 	    pathseg, path, td);
2244 	if ((error = namei(&nd)) != 0)
2245 		return (error);
2246 	vfslocked = NDHASGIANT(&nd);
2247 	NDFREE(&nd, NDF_ONLY_PNBUF);
2248 
2249 	/* If asynchronous I/O is available, it works for all files. */
2250 	if (name == _PC_ASYNC_IO)
2251 		td->td_retval[0] = async_io_version;
2252 	else
2253 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2254 	vput(nd.ni_vp);
2255 	VFS_UNLOCK_GIANT(vfslocked);
2256 	return (error);
2257 }
2258 
2259 /*
2260  * Return target name of a symbolic link.
2261  */
2262 #ifndef _SYS_SYSPROTO_H_
2263 struct readlink_args {
2264 	char	*path;
2265 	char	*buf;
2266 	int	count;
2267 };
2268 #endif
2269 int
2270 readlink(td, uap)
2271 	struct thread *td;
2272 	register struct readlink_args /* {
2273 		char *path;
2274 		char *buf;
2275 		int count;
2276 	} */ *uap;
2277 {
2278 
2279 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2280 	    UIO_USERSPACE, uap->count));
2281 }
2282 
2283 int
2284 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2285     enum uio_seg bufseg, int count)
2286 {
2287 	register struct vnode *vp;
2288 	struct iovec aiov;
2289 	struct uio auio;
2290 	int error;
2291 	struct nameidata nd;
2292 	int vfslocked;
2293 
2294 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2295 	    pathseg, path, td);
2296 	if ((error = namei(&nd)) != 0)
2297 		return (error);
2298 	NDFREE(&nd, NDF_ONLY_PNBUF);
2299 	vfslocked = NDHASGIANT(&nd);
2300 	vp = nd.ni_vp;
2301 #ifdef MAC
2302 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2303 	if (error) {
2304 		vput(vp);
2305 		VFS_UNLOCK_GIANT(vfslocked);
2306 		return (error);
2307 	}
2308 #endif
2309 	if (vp->v_type != VLNK)
2310 		error = EINVAL;
2311 	else {
2312 		aiov.iov_base = buf;
2313 		aiov.iov_len = count;
2314 		auio.uio_iov = &aiov;
2315 		auio.uio_iovcnt = 1;
2316 		auio.uio_offset = 0;
2317 		auio.uio_rw = UIO_READ;
2318 		auio.uio_segflg = bufseg;
2319 		auio.uio_td = td;
2320 		auio.uio_resid = count;
2321 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2322 	}
2323 	vput(vp);
2324 	VFS_UNLOCK_GIANT(vfslocked);
2325 	td->td_retval[0] = count - auio.uio_resid;
2326 	return (error);
2327 }
2328 
2329 /*
2330  * Common implementation code for chflags() and fchflags().
2331  */
2332 static int
2333 setfflags(td, vp, flags)
2334 	struct thread *td;
2335 	struct vnode *vp;
2336 	int flags;
2337 {
2338 	int error;
2339 	struct mount *mp;
2340 	struct vattr vattr;
2341 
2342 	/*
2343 	 * Prevent non-root users from setting flags on devices.  When
2344 	 * a device is reused, users can retain ownership of the device
2345 	 * if they are allowed to set flags and programs assume that
2346 	 * chown can't fail when done as root.
2347 	 */
2348 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2349 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2350 		if (error)
2351 			return (error);
2352 	}
2353 
2354 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2355 		return (error);
2356 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2357 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2358 	VATTR_NULL(&vattr);
2359 	vattr.va_flags = flags;
2360 #ifdef MAC
2361 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2362 	if (error == 0)
2363 #endif
2364 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2365 	VOP_UNLOCK(vp, 0, td);
2366 	vn_finished_write(mp);
2367 	return (error);
2368 }
2369 
2370 /*
2371  * Change flags of a file given a path name.
2372  */
2373 #ifndef _SYS_SYSPROTO_H_
2374 struct chflags_args {
2375 	char	*path;
2376 	int	flags;
2377 };
2378 #endif
2379 int
2380 chflags(td, uap)
2381 	struct thread *td;
2382 	register struct chflags_args /* {
2383 		char *path;
2384 		int flags;
2385 	} */ *uap;
2386 {
2387 	int error;
2388 	struct nameidata nd;
2389 	int vfslocked;
2390 
2391 	AUDIT_ARG(fflags, uap->flags);
2392 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2393 	    uap->path, td);
2394 	if ((error = namei(&nd)) != 0)
2395 		return (error);
2396 	NDFREE(&nd, NDF_ONLY_PNBUF);
2397 	vfslocked = NDHASGIANT(&nd);
2398 	error = setfflags(td, nd.ni_vp, uap->flags);
2399 	vrele(nd.ni_vp);
2400 	VFS_UNLOCK_GIANT(vfslocked);
2401 	return (error);
2402 }
2403 
2404 /*
2405  * Same as chflags() but doesn't follow symlinks.
2406  */
2407 int
2408 lchflags(td, uap)
2409 	struct thread *td;
2410 	register struct lchflags_args /* {
2411 		char *path;
2412 		int flags;
2413 	} */ *uap;
2414 {
2415 	int error;
2416 	struct nameidata nd;
2417 	int vfslocked;
2418 
2419 	AUDIT_ARG(fflags, uap->flags);
2420 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2421 	    uap->path, td);
2422 	if ((error = namei(&nd)) != 0)
2423 		return (error);
2424 	vfslocked = NDHASGIANT(&nd);
2425 	NDFREE(&nd, NDF_ONLY_PNBUF);
2426 	error = setfflags(td, nd.ni_vp, uap->flags);
2427 	vrele(nd.ni_vp);
2428 	VFS_UNLOCK_GIANT(vfslocked);
2429 	return (error);
2430 }
2431 
2432 /*
2433  * Change flags of a file given a file descriptor.
2434  */
2435 #ifndef _SYS_SYSPROTO_H_
2436 struct fchflags_args {
2437 	int	fd;
2438 	int	flags;
2439 };
2440 #endif
2441 int
2442 fchflags(td, uap)
2443 	struct thread *td;
2444 	register struct fchflags_args /* {
2445 		int fd;
2446 		int flags;
2447 	} */ *uap;
2448 {
2449 	struct file *fp;
2450 	int vfslocked;
2451 	int error;
2452 
2453 	AUDIT_ARG(fd, uap->fd);
2454 	AUDIT_ARG(fflags, uap->flags);
2455 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2456 		return (error);
2457 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2458 #ifdef AUDIT
2459 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2460 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2461 	VOP_UNLOCK(fp->f_vnode, 0, td);
2462 #endif
2463 	error = setfflags(td, fp->f_vnode, uap->flags);
2464 	VFS_UNLOCK_GIANT(vfslocked);
2465 	fdrop(fp, td);
2466 	return (error);
2467 }
2468 
2469 /*
2470  * Common implementation code for chmod(), lchmod() and fchmod().
2471  */
2472 static int
2473 setfmode(td, vp, mode)
2474 	struct thread *td;
2475 	struct vnode *vp;
2476 	int mode;
2477 {
2478 	int error;
2479 	struct mount *mp;
2480 	struct vattr vattr;
2481 
2482 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2483 		return (error);
2484 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2485 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2486 	VATTR_NULL(&vattr);
2487 	vattr.va_mode = mode & ALLPERMS;
2488 #ifdef MAC
2489 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2490 	if (error == 0)
2491 #endif
2492 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2493 	VOP_UNLOCK(vp, 0, td);
2494 	vn_finished_write(mp);
2495 	return (error);
2496 }
2497 
2498 /*
2499  * Change mode of a file given path name.
2500  */
2501 #ifndef _SYS_SYSPROTO_H_
2502 struct chmod_args {
2503 	char	*path;
2504 	int	mode;
2505 };
2506 #endif
2507 int
2508 chmod(td, uap)
2509 	struct thread *td;
2510 	register struct chmod_args /* {
2511 		char *path;
2512 		int mode;
2513 	} */ *uap;
2514 {
2515 
2516 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2517 }
2518 
2519 int
2520 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2521 {
2522 	int error;
2523 	struct nameidata nd;
2524 	int vfslocked;
2525 
2526 	AUDIT_ARG(mode, mode);
2527 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2528 	if ((error = namei(&nd)) != 0)
2529 		return (error);
2530 	vfslocked = NDHASGIANT(&nd);
2531 	NDFREE(&nd, NDF_ONLY_PNBUF);
2532 	error = setfmode(td, nd.ni_vp, mode);
2533 	vrele(nd.ni_vp);
2534 	VFS_UNLOCK_GIANT(vfslocked);
2535 	return (error);
2536 }
2537 
2538 /*
2539  * Change mode of a file given path name (don't follow links.)
2540  */
2541 #ifndef _SYS_SYSPROTO_H_
2542 struct lchmod_args {
2543 	char	*path;
2544 	int	mode;
2545 };
2546 #endif
2547 int
2548 lchmod(td, uap)
2549 	struct thread *td;
2550 	register struct lchmod_args /* {
2551 		char *path;
2552 		int mode;
2553 	} */ *uap;
2554 {
2555 	int error;
2556 	struct nameidata nd;
2557 	int vfslocked;
2558 
2559 	AUDIT_ARG(mode, (mode_t)uap->mode);
2560 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2561 	    uap->path, td);
2562 	if ((error = namei(&nd)) != 0)
2563 		return (error);
2564 	vfslocked = NDHASGIANT(&nd);
2565 	NDFREE(&nd, NDF_ONLY_PNBUF);
2566 	error = setfmode(td, nd.ni_vp, uap->mode);
2567 	vrele(nd.ni_vp);
2568 	VFS_UNLOCK_GIANT(vfslocked);
2569 	return (error);
2570 }
2571 
2572 /*
2573  * Change mode of a file given a file descriptor.
2574  */
2575 #ifndef _SYS_SYSPROTO_H_
2576 struct fchmod_args {
2577 	int	fd;
2578 	int	mode;
2579 };
2580 #endif
2581 int
2582 fchmod(td, uap)
2583 	struct thread *td;
2584 	register struct fchmod_args /* {
2585 		int fd;
2586 		int mode;
2587 	} */ *uap;
2588 {
2589 	struct file *fp;
2590 	int vfslocked;
2591 	int error;
2592 
2593 	AUDIT_ARG(fd, uap->fd);
2594 	AUDIT_ARG(mode, uap->mode);
2595 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2596 		return (error);
2597 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2598 #ifdef AUDIT
2599 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2600 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2601 	VOP_UNLOCK(fp->f_vnode, 0, td);
2602 #endif
2603 	error = setfmode(td, fp->f_vnode, uap->mode);
2604 	VFS_UNLOCK_GIANT(vfslocked);
2605 	fdrop(fp, td);
2606 	return (error);
2607 }
2608 
2609 /*
2610  * Common implementation for chown(), lchown(), and fchown()
2611  */
2612 static int
2613 setfown(td, vp, uid, gid)
2614 	struct thread *td;
2615 	struct vnode *vp;
2616 	uid_t uid;
2617 	gid_t gid;
2618 {
2619 	int error;
2620 	struct mount *mp;
2621 	struct vattr vattr;
2622 
2623 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2624 		return (error);
2625 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2626 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2627 	VATTR_NULL(&vattr);
2628 	vattr.va_uid = uid;
2629 	vattr.va_gid = gid;
2630 #ifdef MAC
2631 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2632 	    vattr.va_gid);
2633 	if (error == 0)
2634 #endif
2635 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2636 	VOP_UNLOCK(vp, 0, td);
2637 	vn_finished_write(mp);
2638 	return (error);
2639 }
2640 
2641 /*
2642  * Set ownership given a path name.
2643  */
2644 #ifndef _SYS_SYSPROTO_H_
2645 struct chown_args {
2646 	char	*path;
2647 	int	uid;
2648 	int	gid;
2649 };
2650 #endif
2651 int
2652 chown(td, uap)
2653 	struct thread *td;
2654 	register struct chown_args /* {
2655 		char *path;
2656 		int uid;
2657 		int gid;
2658 	} */ *uap;
2659 {
2660 
2661 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2662 }
2663 
2664 int
2665 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2666     int gid)
2667 {
2668 	int error;
2669 	struct nameidata nd;
2670 	int vfslocked;
2671 
2672 	AUDIT_ARG(owner, uid, gid);
2673 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2674 	if ((error = namei(&nd)) != 0)
2675 		return (error);
2676 	vfslocked = NDHASGIANT(&nd);
2677 	NDFREE(&nd, NDF_ONLY_PNBUF);
2678 	error = setfown(td, nd.ni_vp, uid, gid);
2679 	vrele(nd.ni_vp);
2680 	VFS_UNLOCK_GIANT(vfslocked);
2681 	return (error);
2682 }
2683 
2684 /*
2685  * Set ownership given a path name, do not cross symlinks.
2686  */
2687 #ifndef _SYS_SYSPROTO_H_
2688 struct lchown_args {
2689 	char	*path;
2690 	int	uid;
2691 	int	gid;
2692 };
2693 #endif
2694 int
2695 lchown(td, uap)
2696 	struct thread *td;
2697 	register struct lchown_args /* {
2698 		char *path;
2699 		int uid;
2700 		int gid;
2701 	} */ *uap;
2702 {
2703 
2704 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2705 }
2706 
2707 int
2708 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2709     int gid)
2710 {
2711 	int error;
2712 	struct nameidata nd;
2713 	int vfslocked;
2714 
2715 	AUDIT_ARG(owner, uid, gid);
2716 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2717 	if ((error = namei(&nd)) != 0)
2718 		return (error);
2719 	vfslocked = NDHASGIANT(&nd);
2720 	NDFREE(&nd, NDF_ONLY_PNBUF);
2721 	error = setfown(td, nd.ni_vp, uid, gid);
2722 	vrele(nd.ni_vp);
2723 	VFS_UNLOCK_GIANT(vfslocked);
2724 	return (error);
2725 }
2726 
2727 /*
2728  * Set ownership given a file descriptor.
2729  */
2730 #ifndef _SYS_SYSPROTO_H_
2731 struct fchown_args {
2732 	int	fd;
2733 	int	uid;
2734 	int	gid;
2735 };
2736 #endif
2737 int
2738 fchown(td, uap)
2739 	struct thread *td;
2740 	register struct fchown_args /* {
2741 		int fd;
2742 		int uid;
2743 		int gid;
2744 	} */ *uap;
2745 {
2746 	struct file *fp;
2747 	int vfslocked;
2748 	int error;
2749 
2750 	AUDIT_ARG(fd, uap->fd);
2751 	AUDIT_ARG(owner, uap->uid, uap->gid);
2752 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2753 		return (error);
2754 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2755 #ifdef AUDIT
2756 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2757 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2758 	VOP_UNLOCK(fp->f_vnode, 0, td);
2759 #endif
2760 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2761 	VFS_UNLOCK_GIANT(vfslocked);
2762 	fdrop(fp, td);
2763 	return (error);
2764 }
2765 
2766 /*
2767  * Common implementation code for utimes(), lutimes(), and futimes().
2768  */
2769 static int
2770 getutimes(usrtvp, tvpseg, tsp)
2771 	const struct timeval *usrtvp;
2772 	enum uio_seg tvpseg;
2773 	struct timespec *tsp;
2774 {
2775 	struct timeval tv[2];
2776 	const struct timeval *tvp;
2777 	int error;
2778 
2779 	if (usrtvp == NULL) {
2780 		microtime(&tv[0]);
2781 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2782 		tsp[1] = tsp[0];
2783 	} else {
2784 		if (tvpseg == UIO_SYSSPACE) {
2785 			tvp = usrtvp;
2786 		} else {
2787 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2788 				return (error);
2789 			tvp = tv;
2790 		}
2791 
2792 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2793 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2794 			return (EINVAL);
2795 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2796 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2797 	}
2798 	return (0);
2799 }
2800 
2801 /*
2802  * Common implementation code for utimes(), lutimes(), and futimes().
2803  */
2804 static int
2805 setutimes(td, vp, ts, numtimes, nullflag)
2806 	struct thread *td;
2807 	struct vnode *vp;
2808 	const struct timespec *ts;
2809 	int numtimes;
2810 	int nullflag;
2811 {
2812 	int error, setbirthtime;
2813 	struct mount *mp;
2814 	struct vattr vattr;
2815 
2816 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2817 		return (error);
2818 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2819 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2820 	setbirthtime = 0;
2821 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2822 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2823 		setbirthtime = 1;
2824 	VATTR_NULL(&vattr);
2825 	vattr.va_atime = ts[0];
2826 	vattr.va_mtime = ts[1];
2827 	if (setbirthtime)
2828 		vattr.va_birthtime = ts[1];
2829 	if (numtimes > 2)
2830 		vattr.va_birthtime = ts[2];
2831 	if (nullflag)
2832 		vattr.va_vaflags |= VA_UTIMES_NULL;
2833 #ifdef MAC
2834 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2835 	    vattr.va_mtime);
2836 #endif
2837 	if (error == 0)
2838 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2839 	VOP_UNLOCK(vp, 0, td);
2840 	vn_finished_write(mp);
2841 	return (error);
2842 }
2843 
2844 /*
2845  * Set the access and modification times of a file.
2846  */
2847 #ifndef _SYS_SYSPROTO_H_
2848 struct utimes_args {
2849 	char	*path;
2850 	struct	timeval *tptr;
2851 };
2852 #endif
2853 int
2854 utimes(td, uap)
2855 	struct thread *td;
2856 	register struct utimes_args /* {
2857 		char *path;
2858 		struct timeval *tptr;
2859 	} */ *uap;
2860 {
2861 
2862 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2863 	    UIO_USERSPACE));
2864 }
2865 
2866 int
2867 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2868     struct timeval *tptr, enum uio_seg tptrseg)
2869 {
2870 	struct timespec ts[2];
2871 	int error;
2872 	struct nameidata nd;
2873 	int vfslocked;
2874 
2875 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2876 		return (error);
2877 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2878 	if ((error = namei(&nd)) != 0)
2879 		return (error);
2880 	vfslocked = NDHASGIANT(&nd);
2881 	NDFREE(&nd, NDF_ONLY_PNBUF);
2882 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2883 	vrele(nd.ni_vp);
2884 	VFS_UNLOCK_GIANT(vfslocked);
2885 	return (error);
2886 }
2887 
2888 /*
2889  * Set the access and modification times of a file.
2890  */
2891 #ifndef _SYS_SYSPROTO_H_
2892 struct lutimes_args {
2893 	char	*path;
2894 	struct	timeval *tptr;
2895 };
2896 #endif
2897 int
2898 lutimes(td, uap)
2899 	struct thread *td;
2900 	register struct lutimes_args /* {
2901 		char *path;
2902 		struct timeval *tptr;
2903 	} */ *uap;
2904 {
2905 
2906 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2907 	    UIO_USERSPACE));
2908 }
2909 
2910 int
2911 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2912     struct timeval *tptr, enum uio_seg tptrseg)
2913 {
2914 	struct timespec ts[2];
2915 	int error;
2916 	struct nameidata nd;
2917 	int vfslocked;
2918 
2919 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2920 		return (error);
2921 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2922 	if ((error = namei(&nd)) != 0)
2923 		return (error);
2924 	vfslocked = NDHASGIANT(&nd);
2925 	NDFREE(&nd, NDF_ONLY_PNBUF);
2926 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2927 	vrele(nd.ni_vp);
2928 	VFS_UNLOCK_GIANT(vfslocked);
2929 	return (error);
2930 }
2931 
2932 /*
2933  * Set the access and modification times of a file.
2934  */
2935 #ifndef _SYS_SYSPROTO_H_
2936 struct futimes_args {
2937 	int	fd;
2938 	struct	timeval *tptr;
2939 };
2940 #endif
2941 int
2942 futimes(td, uap)
2943 	struct thread *td;
2944 	register struct futimes_args /* {
2945 		int  fd;
2946 		struct timeval *tptr;
2947 	} */ *uap;
2948 {
2949 
2950 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2951 }
2952 
2953 int
2954 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2955     enum uio_seg tptrseg)
2956 {
2957 	struct timespec ts[2];
2958 	struct file *fp;
2959 	int vfslocked;
2960 	int error;
2961 
2962 	AUDIT_ARG(fd, fd);
2963 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2964 		return (error);
2965 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2966 		return (error);
2967 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2968 #ifdef AUDIT
2969 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2970 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2971 	VOP_UNLOCK(fp->f_vnode, 0, td);
2972 #endif
2973 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2974 	VFS_UNLOCK_GIANT(vfslocked);
2975 	fdrop(fp, td);
2976 	return (error);
2977 }
2978 
2979 /*
2980  * Truncate a file given its path name.
2981  */
2982 #ifndef _SYS_SYSPROTO_H_
2983 struct truncate_args {
2984 	char	*path;
2985 	int	pad;
2986 	off_t	length;
2987 };
2988 #endif
2989 int
2990 truncate(td, uap)
2991 	struct thread *td;
2992 	register struct truncate_args /* {
2993 		char *path;
2994 		int pad;
2995 		off_t length;
2996 	} */ *uap;
2997 {
2998 
2999 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3000 }
3001 
3002 int
3003 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3004 {
3005 	struct mount *mp;
3006 	struct vnode *vp;
3007 	struct vattr vattr;
3008 	int error;
3009 	struct nameidata nd;
3010 	int vfslocked;
3011 
3012 	if (length < 0)
3013 		return(EINVAL);
3014 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3015 	if ((error = namei(&nd)) != 0)
3016 		return (error);
3017 	vfslocked = NDHASGIANT(&nd);
3018 	vp = nd.ni_vp;
3019 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3020 		vrele(vp);
3021 		VFS_UNLOCK_GIANT(vfslocked);
3022 		return (error);
3023 	}
3024 	NDFREE(&nd, NDF_ONLY_PNBUF);
3025 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3026 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3027 	if (vp->v_type == VDIR)
3028 		error = EISDIR;
3029 #ifdef MAC
3030 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3031 	}
3032 #endif
3033 	else if ((error = vn_writechk(vp)) == 0 &&
3034 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3035 		VATTR_NULL(&vattr);
3036 		vattr.va_size = length;
3037 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3038 	}
3039 	vput(vp);
3040 	vn_finished_write(mp);
3041 	VFS_UNLOCK_GIANT(vfslocked);
3042 	return (error);
3043 }
3044 
3045 /*
3046  * Truncate a file given a file descriptor.
3047  */
3048 #ifndef _SYS_SYSPROTO_H_
3049 struct ftruncate_args {
3050 	int	fd;
3051 	int	pad;
3052 	off_t	length;
3053 };
3054 #endif
3055 int
3056 ftruncate(td, uap)
3057 	struct thread *td;
3058 	register struct ftruncate_args /* {
3059 		int fd;
3060 		int pad;
3061 		off_t length;
3062 	} */ *uap;
3063 {
3064 	struct mount *mp;
3065 	struct vattr vattr;
3066 	struct vnode *vp;
3067 	struct file *fp;
3068 	int vfslocked;
3069 	int error;
3070 
3071 	AUDIT_ARG(fd, uap->fd);
3072 	if (uap->length < 0)
3073 		return(EINVAL);
3074 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3075 		return (error);
3076 	if ((fp->f_flag & FWRITE) == 0) {
3077 		fdrop(fp, td);
3078 		return (EINVAL);
3079 	}
3080 	vp = fp->f_vnode;
3081 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3082 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3083 		goto drop;
3084 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3085 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3086 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3087 	if (vp->v_type == VDIR)
3088 		error = EISDIR;
3089 #ifdef MAC
3090 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3091 	    vp))) {
3092 	}
3093 #endif
3094 	else if ((error = vn_writechk(vp)) == 0) {
3095 		VATTR_NULL(&vattr);
3096 		vattr.va_size = uap->length;
3097 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3098 	}
3099 	VOP_UNLOCK(vp, 0, td);
3100 	vn_finished_write(mp);
3101 drop:
3102 	VFS_UNLOCK_GIANT(vfslocked);
3103 	fdrop(fp, td);
3104 	return (error);
3105 }
3106 
3107 #if defined(COMPAT_43)
3108 /*
3109  * Truncate a file given its path name.
3110  */
3111 #ifndef _SYS_SYSPROTO_H_
3112 struct otruncate_args {
3113 	char	*path;
3114 	long	length;
3115 };
3116 #endif
3117 int
3118 otruncate(td, uap)
3119 	struct thread *td;
3120 	register struct otruncate_args /* {
3121 		char *path;
3122 		long length;
3123 	} */ *uap;
3124 {
3125 	struct truncate_args /* {
3126 		char *path;
3127 		int pad;
3128 		off_t length;
3129 	} */ nuap;
3130 
3131 	nuap.path = uap->path;
3132 	nuap.length = uap->length;
3133 	return (truncate(td, &nuap));
3134 }
3135 
3136 /*
3137  * Truncate a file given a file descriptor.
3138  */
3139 #ifndef _SYS_SYSPROTO_H_
3140 struct oftruncate_args {
3141 	int	fd;
3142 	long	length;
3143 };
3144 #endif
3145 int
3146 oftruncate(td, uap)
3147 	struct thread *td;
3148 	register struct oftruncate_args /* {
3149 		int fd;
3150 		long length;
3151 	} */ *uap;
3152 {
3153 	struct ftruncate_args /* {
3154 		int fd;
3155 		int pad;
3156 		off_t length;
3157 	} */ nuap;
3158 
3159 	nuap.fd = uap->fd;
3160 	nuap.length = uap->length;
3161 	return (ftruncate(td, &nuap));
3162 }
3163 #endif /* COMPAT_43 */
3164 
3165 /* Versions with the pad argument */
3166 int
3167 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3168 {
3169 	struct truncate_args ouap;
3170 
3171 	ouap.path = uap->path;
3172 	ouap.length = uap->length;
3173 	return (truncate(td, &ouap));
3174 }
3175 
3176 int
3177 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3178 {
3179 	struct ftruncate_args ouap;
3180 
3181 	ouap.fd = uap->fd;
3182 	ouap.length = uap->length;
3183 	return (ftruncate(td, &ouap));
3184 }
3185 
3186 /*
3187  * Sync an open file.
3188  */
3189 #ifndef _SYS_SYSPROTO_H_
3190 struct fsync_args {
3191 	int	fd;
3192 };
3193 #endif
3194 int
3195 fsync(td, uap)
3196 	struct thread *td;
3197 	struct fsync_args /* {
3198 		int fd;
3199 	} */ *uap;
3200 {
3201 	struct vnode *vp;
3202 	struct mount *mp;
3203 	struct file *fp;
3204 	int vfslocked;
3205 	int error;
3206 
3207 	AUDIT_ARG(fd, uap->fd);
3208 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3209 		return (error);
3210 	vp = fp->f_vnode;
3211 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3212 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3213 		goto drop;
3214 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3215 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3216 	if (vp->v_object != NULL) {
3217 		VM_OBJECT_LOCK(vp->v_object);
3218 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3219 		VM_OBJECT_UNLOCK(vp->v_object);
3220 	}
3221 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3222 
3223 	VOP_UNLOCK(vp, 0, td);
3224 	vn_finished_write(mp);
3225 drop:
3226 	VFS_UNLOCK_GIANT(vfslocked);
3227 	fdrop(fp, td);
3228 	return (error);
3229 }
3230 
3231 /*
3232  * Rename files.  Source and destination must either both be directories, or
3233  * both not be directories.  If target is a directory, it must be empty.
3234  */
3235 #ifndef _SYS_SYSPROTO_H_
3236 struct rename_args {
3237 	char	*from;
3238 	char	*to;
3239 };
3240 #endif
3241 int
3242 rename(td, uap)
3243 	struct thread *td;
3244 	register struct rename_args /* {
3245 		char *from;
3246 		char *to;
3247 	} */ *uap;
3248 {
3249 
3250 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3251 }
3252 
3253 int
3254 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3255 {
3256 	struct mount *mp = NULL;
3257 	struct vnode *tvp, *fvp, *tdvp;
3258 	struct nameidata fromnd, tond;
3259 	int tvfslocked;
3260 	int fvfslocked;
3261 	int error;
3262 
3263 	bwillwrite();
3264 #ifdef MAC
3265 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3266 	    AUDITVNODE1, pathseg, from, td);
3267 #else
3268 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3269 	    AUDITVNODE1, pathseg, from, td);
3270 #endif
3271 	if ((error = namei(&fromnd)) != 0)
3272 		return (error);
3273 	fvfslocked = NDHASGIANT(&fromnd);
3274 	tvfslocked = 0;
3275 #ifdef MAC
3276 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3277 	    fromnd.ni_vp, &fromnd.ni_cnd);
3278 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3279 	if (fromnd.ni_dvp != fromnd.ni_vp)
3280 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3281 #endif
3282 	fvp = fromnd.ni_vp;
3283 	if (error == 0)
3284 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3285 	if (error != 0) {
3286 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3287 		vrele(fromnd.ni_dvp);
3288 		vrele(fvp);
3289 		goto out1;
3290 	}
3291 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3292 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3293 	if (fromnd.ni_vp->v_type == VDIR)
3294 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3295 	if ((error = namei(&tond)) != 0) {
3296 		/* Translate error code for rename("dir1", "dir2/."). */
3297 		if (error == EISDIR && fvp->v_type == VDIR)
3298 			error = EINVAL;
3299 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3300 		vrele(fromnd.ni_dvp);
3301 		vrele(fvp);
3302 		vn_finished_write(mp);
3303 		goto out1;
3304 	}
3305 	tvfslocked = NDHASGIANT(&tond);
3306 	tdvp = tond.ni_dvp;
3307 	tvp = tond.ni_vp;
3308 	if (tvp != NULL) {
3309 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3310 			error = ENOTDIR;
3311 			goto out;
3312 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3313 			error = EISDIR;
3314 			goto out;
3315 		}
3316 	}
3317 	if (fvp == tdvp)
3318 		error = EINVAL;
3319 	/*
3320 	 * If the source is the same as the destination (that is, if they
3321 	 * are links to the same vnode), then there is nothing to do.
3322 	 */
3323 	if (fvp == tvp)
3324 		error = -1;
3325 #ifdef MAC
3326 	else
3327 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3328 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3329 #endif
3330 out:
3331 	if (!error) {
3332 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3333 		if (fromnd.ni_dvp != tdvp) {
3334 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3335 		}
3336 		if (tvp) {
3337 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3338 		}
3339 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3340 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3341 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3342 		NDFREE(&tond, NDF_ONLY_PNBUF);
3343 	} else {
3344 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3345 		NDFREE(&tond, NDF_ONLY_PNBUF);
3346 		if (tvp)
3347 			vput(tvp);
3348 		if (tdvp == tvp)
3349 			vrele(tdvp);
3350 		else
3351 			vput(tdvp);
3352 		vrele(fromnd.ni_dvp);
3353 		vrele(fvp);
3354 	}
3355 	vrele(tond.ni_startdir);
3356 	vn_finished_write(mp);
3357 out1:
3358 	if (fromnd.ni_startdir)
3359 		vrele(fromnd.ni_startdir);
3360 	VFS_UNLOCK_GIANT(fvfslocked);
3361 	VFS_UNLOCK_GIANT(tvfslocked);
3362 	if (error == -1)
3363 		return (0);
3364 	return (error);
3365 }
3366 
3367 /*
3368  * Make a directory file.
3369  */
3370 #ifndef _SYS_SYSPROTO_H_
3371 struct mkdir_args {
3372 	char	*path;
3373 	int	mode;
3374 };
3375 #endif
3376 int
3377 mkdir(td, uap)
3378 	struct thread *td;
3379 	register struct mkdir_args /* {
3380 		char *path;
3381 		int mode;
3382 	} */ *uap;
3383 {
3384 
3385 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3386 }
3387 
3388 int
3389 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3390 {
3391 	struct mount *mp;
3392 	struct vnode *vp;
3393 	struct vattr vattr;
3394 	int error;
3395 	struct nameidata nd;
3396 	int vfslocked;
3397 
3398 	AUDIT_ARG(mode, mode);
3399 restart:
3400 	bwillwrite();
3401 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3402 	    segflg, path, td);
3403 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3404 	if ((error = namei(&nd)) != 0)
3405 		return (error);
3406 	vfslocked = NDHASGIANT(&nd);
3407 	vp = nd.ni_vp;
3408 	if (vp != NULL) {
3409 		NDFREE(&nd, NDF_ONLY_PNBUF);
3410 		/*
3411 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3412 		 * the strange behaviour of leaving the vnode unlocked
3413 		 * if the target is the same vnode as the parent.
3414 		 */
3415 		if (vp == nd.ni_dvp)
3416 			vrele(nd.ni_dvp);
3417 		else
3418 			vput(nd.ni_dvp);
3419 		vrele(vp);
3420 		VFS_UNLOCK_GIANT(vfslocked);
3421 		return (EEXIST);
3422 	}
3423 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3424 		NDFREE(&nd, NDF_ONLY_PNBUF);
3425 		vput(nd.ni_dvp);
3426 		VFS_UNLOCK_GIANT(vfslocked);
3427 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3428 			return (error);
3429 		goto restart;
3430 	}
3431 	VATTR_NULL(&vattr);
3432 	vattr.va_type = VDIR;
3433 	FILEDESC_SLOCK(td->td_proc->p_fd);
3434 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3435 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3436 #ifdef MAC
3437 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3438 	    &vattr);
3439 	if (error)
3440 		goto out;
3441 #endif
3442 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3443 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3444 #ifdef MAC
3445 out:
3446 #endif
3447 	NDFREE(&nd, NDF_ONLY_PNBUF);
3448 	vput(nd.ni_dvp);
3449 	if (!error)
3450 		vput(nd.ni_vp);
3451 	vn_finished_write(mp);
3452 	VFS_UNLOCK_GIANT(vfslocked);
3453 	return (error);
3454 }
3455 
3456 /*
3457  * Remove a directory file.
3458  */
3459 #ifndef _SYS_SYSPROTO_H_
3460 struct rmdir_args {
3461 	char	*path;
3462 };
3463 #endif
3464 int
3465 rmdir(td, uap)
3466 	struct thread *td;
3467 	struct rmdir_args /* {
3468 		char *path;
3469 	} */ *uap;
3470 {
3471 
3472 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3473 }
3474 
3475 int
3476 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3477 {
3478 	struct mount *mp;
3479 	struct vnode *vp;
3480 	int error;
3481 	struct nameidata nd;
3482 	int vfslocked;
3483 
3484 restart:
3485 	bwillwrite();
3486 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3487 	    pathseg, path, td);
3488 	if ((error = namei(&nd)) != 0)
3489 		return (error);
3490 	vfslocked = NDHASGIANT(&nd);
3491 	vp = nd.ni_vp;
3492 	if (vp->v_type != VDIR) {
3493 		error = ENOTDIR;
3494 		goto out;
3495 	}
3496 	/*
3497 	 * No rmdir "." please.
3498 	 */
3499 	if (nd.ni_dvp == vp) {
3500 		error = EINVAL;
3501 		goto out;
3502 	}
3503 	/*
3504 	 * The root of a mounted filesystem cannot be deleted.
3505 	 */
3506 	if (vp->v_vflag & VV_ROOT) {
3507 		error = EBUSY;
3508 		goto out;
3509 	}
3510 #ifdef MAC
3511 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3512 	    &nd.ni_cnd);
3513 	if (error)
3514 		goto out;
3515 #endif
3516 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3517 		NDFREE(&nd, NDF_ONLY_PNBUF);
3518 		vput(vp);
3519 		if (nd.ni_dvp == vp)
3520 			vrele(nd.ni_dvp);
3521 		else
3522 			vput(nd.ni_dvp);
3523 		VFS_UNLOCK_GIANT(vfslocked);
3524 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3525 			return (error);
3526 		goto restart;
3527 	}
3528 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3529 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3530 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3531 	vn_finished_write(mp);
3532 out:
3533 	NDFREE(&nd, NDF_ONLY_PNBUF);
3534 	vput(vp);
3535 	if (nd.ni_dvp == vp)
3536 		vrele(nd.ni_dvp);
3537 	else
3538 		vput(nd.ni_dvp);
3539 	VFS_UNLOCK_GIANT(vfslocked);
3540 	return (error);
3541 }
3542 
3543 #ifdef COMPAT_43
3544 /*
3545  * Read a block of directory entries in a filesystem independent format.
3546  */
3547 #ifndef _SYS_SYSPROTO_H_
3548 struct ogetdirentries_args {
3549 	int	fd;
3550 	char	*buf;
3551 	u_int	count;
3552 	long	*basep;
3553 };
3554 #endif
3555 int
3556 ogetdirentries(td, uap)
3557 	struct thread *td;
3558 	register struct ogetdirentries_args /* {
3559 		int fd;
3560 		char *buf;
3561 		u_int count;
3562 		long *basep;
3563 	} */ *uap;
3564 {
3565 	struct vnode *vp;
3566 	struct file *fp;
3567 	struct uio auio, kuio;
3568 	struct iovec aiov, kiov;
3569 	struct dirent *dp, *edp;
3570 	caddr_t dirbuf;
3571 	int error, eofflag, readcnt, vfslocked;
3572 	long loff;
3573 
3574 	/* XXX arbitrary sanity limit on `count'. */
3575 	if (uap->count > 64 * 1024)
3576 		return (EINVAL);
3577 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3578 		return (error);
3579 	if ((fp->f_flag & FREAD) == 0) {
3580 		fdrop(fp, td);
3581 		return (EBADF);
3582 	}
3583 	vp = fp->f_vnode;
3584 unionread:
3585 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3586 	if (vp->v_type != VDIR) {
3587 		VFS_UNLOCK_GIANT(vfslocked);
3588 		fdrop(fp, td);
3589 		return (EINVAL);
3590 	}
3591 	aiov.iov_base = uap->buf;
3592 	aiov.iov_len = uap->count;
3593 	auio.uio_iov = &aiov;
3594 	auio.uio_iovcnt = 1;
3595 	auio.uio_rw = UIO_READ;
3596 	auio.uio_segflg = UIO_USERSPACE;
3597 	auio.uio_td = td;
3598 	auio.uio_resid = uap->count;
3599 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3600 	loff = auio.uio_offset = fp->f_offset;
3601 #ifdef MAC
3602 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3603 	if (error) {
3604 		VOP_UNLOCK(vp, 0, td);
3605 		VFS_UNLOCK_GIANT(vfslocked);
3606 		fdrop(fp, td);
3607 		return (error);
3608 	}
3609 #endif
3610 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3611 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3612 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3613 			    NULL, NULL);
3614 			fp->f_offset = auio.uio_offset;
3615 		} else
3616 #	endif
3617 	{
3618 		kuio = auio;
3619 		kuio.uio_iov = &kiov;
3620 		kuio.uio_segflg = UIO_SYSSPACE;
3621 		kiov.iov_len = uap->count;
3622 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3623 		kiov.iov_base = dirbuf;
3624 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3625 			    NULL, NULL);
3626 		fp->f_offset = kuio.uio_offset;
3627 		if (error == 0) {
3628 			readcnt = uap->count - kuio.uio_resid;
3629 			edp = (struct dirent *)&dirbuf[readcnt];
3630 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3631 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3632 					/*
3633 					 * The expected low byte of
3634 					 * dp->d_namlen is our dp->d_type.
3635 					 * The high MBZ byte of dp->d_namlen
3636 					 * is our dp->d_namlen.
3637 					 */
3638 					dp->d_type = dp->d_namlen;
3639 					dp->d_namlen = 0;
3640 #				else
3641 					/*
3642 					 * The dp->d_type is the high byte
3643 					 * of the expected dp->d_namlen,
3644 					 * so must be zero'ed.
3645 					 */
3646 					dp->d_type = 0;
3647 #				endif
3648 				if (dp->d_reclen > 0) {
3649 					dp = (struct dirent *)
3650 					    ((char *)dp + dp->d_reclen);
3651 				} else {
3652 					error = EIO;
3653 					break;
3654 				}
3655 			}
3656 			if (dp >= edp)
3657 				error = uiomove(dirbuf, readcnt, &auio);
3658 		}
3659 		FREE(dirbuf, M_TEMP);
3660 	}
3661 	if (error) {
3662 		VOP_UNLOCK(vp, 0, td);
3663 		VFS_UNLOCK_GIANT(vfslocked);
3664 		fdrop(fp, td);
3665 		return (error);
3666 	}
3667 	if (uap->count == auio.uio_resid &&
3668 	    (vp->v_vflag & VV_ROOT) &&
3669 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3670 		struct vnode *tvp = vp;
3671 		vp = vp->v_mount->mnt_vnodecovered;
3672 		VREF(vp);
3673 		fp->f_vnode = vp;
3674 		fp->f_data = vp;
3675 		fp->f_offset = 0;
3676 		vput(tvp);
3677 		VFS_UNLOCK_GIANT(vfslocked);
3678 		goto unionread;
3679 	}
3680 	VOP_UNLOCK(vp, 0, td);
3681 	VFS_UNLOCK_GIANT(vfslocked);
3682 	error = copyout(&loff, uap->basep, sizeof(long));
3683 	fdrop(fp, td);
3684 	td->td_retval[0] = uap->count - auio.uio_resid;
3685 	return (error);
3686 }
3687 #endif /* COMPAT_43 */
3688 
3689 /*
3690  * Read a block of directory entries in a filesystem independent format.
3691  */
3692 #ifndef _SYS_SYSPROTO_H_
3693 struct getdirentries_args {
3694 	int	fd;
3695 	char	*buf;
3696 	u_int	count;
3697 	long	*basep;
3698 };
3699 #endif
3700 int
3701 getdirentries(td, uap)
3702 	struct thread *td;
3703 	register struct getdirentries_args /* {
3704 		int fd;
3705 		char *buf;
3706 		u_int count;
3707 		long *basep;
3708 	} */ *uap;
3709 {
3710 	struct vnode *vp;
3711 	struct file *fp;
3712 	struct uio auio;
3713 	struct iovec aiov;
3714 	int vfslocked;
3715 	long loff;
3716 	int error, eofflag;
3717 
3718 	AUDIT_ARG(fd, uap->fd);
3719 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3720 		return (error);
3721 	if ((fp->f_flag & FREAD) == 0) {
3722 		fdrop(fp, td);
3723 		return (EBADF);
3724 	}
3725 	vp = fp->f_vnode;
3726 unionread:
3727 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3728 	if (vp->v_type != VDIR) {
3729 		VFS_UNLOCK_GIANT(vfslocked);
3730 		error = EINVAL;
3731 		goto fail;
3732 	}
3733 	aiov.iov_base = uap->buf;
3734 	aiov.iov_len = uap->count;
3735 	auio.uio_iov = &aiov;
3736 	auio.uio_iovcnt = 1;
3737 	auio.uio_rw = UIO_READ;
3738 	auio.uio_segflg = UIO_USERSPACE;
3739 	auio.uio_td = td;
3740 	auio.uio_resid = uap->count;
3741 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3742 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3743 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3744 	loff = auio.uio_offset = fp->f_offset;
3745 #ifdef MAC
3746 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3747 	if (error == 0)
3748 #endif
3749 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3750 		    NULL);
3751 	fp->f_offset = auio.uio_offset;
3752 	if (error) {
3753 		VOP_UNLOCK(vp, 0, td);
3754 		VFS_UNLOCK_GIANT(vfslocked);
3755 		goto fail;
3756 	}
3757 	if (uap->count == auio.uio_resid &&
3758 	    (vp->v_vflag & VV_ROOT) &&
3759 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3760 		struct vnode *tvp = vp;
3761 		vp = vp->v_mount->mnt_vnodecovered;
3762 		VREF(vp);
3763 		fp->f_vnode = vp;
3764 		fp->f_data = vp;
3765 		fp->f_offset = 0;
3766 		vput(tvp);
3767 		VFS_UNLOCK_GIANT(vfslocked);
3768 		goto unionread;
3769 	}
3770 	VOP_UNLOCK(vp, 0, td);
3771 	VFS_UNLOCK_GIANT(vfslocked);
3772 	if (uap->basep != NULL) {
3773 		error = copyout(&loff, uap->basep, sizeof(long));
3774 	}
3775 	td->td_retval[0] = uap->count - auio.uio_resid;
3776 fail:
3777 	fdrop(fp, td);
3778 	return (error);
3779 }
3780 
3781 #ifndef _SYS_SYSPROTO_H_
3782 struct getdents_args {
3783 	int fd;
3784 	char *buf;
3785 	size_t count;
3786 };
3787 #endif
3788 int
3789 getdents(td, uap)
3790 	struct thread *td;
3791 	register struct getdents_args /* {
3792 		int fd;
3793 		char *buf;
3794 		u_int count;
3795 	} */ *uap;
3796 {
3797 	struct getdirentries_args ap;
3798 	ap.fd = uap->fd;
3799 	ap.buf = uap->buf;
3800 	ap.count = uap->count;
3801 	ap.basep = NULL;
3802 	return (getdirentries(td, &ap));
3803 }
3804 
3805 /*
3806  * Set the mode mask for creation of filesystem nodes.
3807  */
3808 #ifndef _SYS_SYSPROTO_H_
3809 struct umask_args {
3810 	int	newmask;
3811 };
3812 #endif
3813 int
3814 umask(td, uap)
3815 	struct thread *td;
3816 	struct umask_args /* {
3817 		int newmask;
3818 	} */ *uap;
3819 {
3820 	register struct filedesc *fdp;
3821 
3822 	FILEDESC_XLOCK(td->td_proc->p_fd);
3823 	fdp = td->td_proc->p_fd;
3824 	td->td_retval[0] = fdp->fd_cmask;
3825 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3826 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
3827 	return (0);
3828 }
3829 
3830 /*
3831  * Void all references to file by ripping underlying filesystem away from
3832  * vnode.
3833  */
3834 #ifndef _SYS_SYSPROTO_H_
3835 struct revoke_args {
3836 	char	*path;
3837 };
3838 #endif
3839 int
3840 revoke(td, uap)
3841 	struct thread *td;
3842 	register struct revoke_args /* {
3843 		char *path;
3844 	} */ *uap;
3845 {
3846 	struct vnode *vp;
3847 	struct vattr vattr;
3848 	int error;
3849 	struct nameidata nd;
3850 	int vfslocked;
3851 
3852 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3853 	    UIO_USERSPACE, uap->path, td);
3854 	if ((error = namei(&nd)) != 0)
3855 		return (error);
3856 	vfslocked = NDHASGIANT(&nd);
3857 	vp = nd.ni_vp;
3858 	NDFREE(&nd, NDF_ONLY_PNBUF);
3859 	if (vp->v_type != VCHR) {
3860 		error = EINVAL;
3861 		goto out;
3862 	}
3863 #ifdef MAC
3864 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3865 	if (error)
3866 		goto out;
3867 #endif
3868 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3869 	if (error)
3870 		goto out;
3871 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3872 		error = priv_check(td, PRIV_VFS_ADMIN);
3873 		if (error)
3874 			goto out;
3875 	}
3876 	if (vcount(vp) > 1)
3877 		VOP_REVOKE(vp, REVOKEALL);
3878 out:
3879 	vput(vp);
3880 	VFS_UNLOCK_GIANT(vfslocked);
3881 	return (error);
3882 }
3883 
3884 /*
3885  * Convert a user file descriptor to a kernel file entry.
3886  * A reference on the file entry is held upon returning.
3887  */
3888 int
3889 getvnode(fdp, fd, fpp)
3890 	struct filedesc *fdp;
3891 	int fd;
3892 	struct file **fpp;
3893 {
3894 	int error;
3895 	struct file *fp;
3896 
3897 	fp = NULL;
3898 	if (fdp == NULL)
3899 		error = EBADF;
3900 	else {
3901 		FILEDESC_SLOCK(fdp);
3902 		if ((u_int)fd >= fdp->fd_nfiles ||
3903 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3904 			error = EBADF;
3905 		else if (fp->f_vnode == NULL) {
3906 			fp = NULL;
3907 			error = EINVAL;
3908 		} else {
3909 			fhold(fp);
3910 			error = 0;
3911 		}
3912 		FILEDESC_SUNLOCK(fdp);
3913 	}
3914 	*fpp = fp;
3915 	return (error);
3916 }
3917 
3918 /*
3919  * Get an (NFS) file handle.
3920  */
3921 #ifndef _SYS_SYSPROTO_H_
3922 struct lgetfh_args {
3923 	char	*fname;
3924 	fhandle_t *fhp;
3925 };
3926 #endif
3927 int
3928 lgetfh(td, uap)
3929 	struct thread *td;
3930 	register struct lgetfh_args *uap;
3931 {
3932 	struct nameidata nd;
3933 	fhandle_t fh;
3934 	register struct vnode *vp;
3935 	int vfslocked;
3936 	int error;
3937 
3938 	error = priv_check(td, PRIV_VFS_GETFH);
3939 	if (error)
3940 		return (error);
3941 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3942 	    UIO_USERSPACE, uap->fname, td);
3943 	error = namei(&nd);
3944 	if (error)
3945 		return (error);
3946 	vfslocked = NDHASGIANT(&nd);
3947 	NDFREE(&nd, NDF_ONLY_PNBUF);
3948 	vp = nd.ni_vp;
3949 	bzero(&fh, sizeof(fh));
3950 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3951 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3952 	vput(vp);
3953 	VFS_UNLOCK_GIANT(vfslocked);
3954 	if (error)
3955 		return (error);
3956 	error = copyout(&fh, uap->fhp, sizeof (fh));
3957 	return (error);
3958 }
3959 
3960 #ifndef _SYS_SYSPROTO_H_
3961 struct getfh_args {
3962 	char	*fname;
3963 	fhandle_t *fhp;
3964 };
3965 #endif
3966 int
3967 getfh(td, uap)
3968 	struct thread *td;
3969 	register struct getfh_args *uap;
3970 {
3971 	struct nameidata nd;
3972 	fhandle_t fh;
3973 	register struct vnode *vp;
3974 	int vfslocked;
3975 	int error;
3976 
3977 	error = priv_check(td, PRIV_VFS_GETFH);
3978 	if (error)
3979 		return (error);
3980 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3981 	    UIO_USERSPACE, uap->fname, td);
3982 	error = namei(&nd);
3983 	if (error)
3984 		return (error);
3985 	vfslocked = NDHASGIANT(&nd);
3986 	NDFREE(&nd, NDF_ONLY_PNBUF);
3987 	vp = nd.ni_vp;
3988 	bzero(&fh, sizeof(fh));
3989 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3990 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3991 	vput(vp);
3992 	VFS_UNLOCK_GIANT(vfslocked);
3993 	if (error)
3994 		return (error);
3995 	error = copyout(&fh, uap->fhp, sizeof (fh));
3996 	return (error);
3997 }
3998 
3999 /*
4000  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4001  * open descriptor.
4002  *
4003  * warning: do not remove the priv_check() call or this becomes one giant
4004  * security hole.
4005  */
4006 #ifndef _SYS_SYSPROTO_H_
4007 struct fhopen_args {
4008 	const struct fhandle *u_fhp;
4009 	int flags;
4010 };
4011 #endif
4012 int
4013 fhopen(td, uap)
4014 	struct thread *td;
4015 	struct fhopen_args /* {
4016 		const struct fhandle *u_fhp;
4017 		int flags;
4018 	} */ *uap;
4019 {
4020 	struct proc *p = td->td_proc;
4021 	struct mount *mp;
4022 	struct vnode *vp;
4023 	struct fhandle fhp;
4024 	struct vattr vat;
4025 	struct vattr *vap = &vat;
4026 	struct flock lf;
4027 	struct file *fp;
4028 	register struct filedesc *fdp = p->p_fd;
4029 	int fmode, mode, error, type;
4030 	struct file *nfp;
4031 	int vfslocked;
4032 	int indx;
4033 
4034 	error = priv_check(td, PRIV_VFS_FHOPEN);
4035 	if (error)
4036 		return (error);
4037 	fmode = FFLAGS(uap->flags);
4038 	/* why not allow a non-read/write open for our lockd? */
4039 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4040 		return (EINVAL);
4041 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4042 	if (error)
4043 		return(error);
4044 	/* find the mount point */
4045 	mp = vfs_getvfs(&fhp.fh_fsid);
4046 	if (mp == NULL)
4047 		return (ESTALE);
4048 	vfslocked = VFS_LOCK_GIANT(mp);
4049 	/* now give me my vnode, it gets returned to me locked */
4050 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4051 	if (error)
4052 		goto out;
4053 	/*
4054 	 * from now on we have to make sure not
4055 	 * to forget about the vnode
4056 	 * any error that causes an abort must vput(vp)
4057 	 * just set error = err and 'goto bad;'.
4058 	 */
4059 
4060 	/*
4061 	 * from vn_open
4062 	 */
4063 	if (vp->v_type == VLNK) {
4064 		error = EMLINK;
4065 		goto bad;
4066 	}
4067 	if (vp->v_type == VSOCK) {
4068 		error = EOPNOTSUPP;
4069 		goto bad;
4070 	}
4071 	mode = 0;
4072 	if (fmode & (FWRITE | O_TRUNC)) {
4073 		if (vp->v_type == VDIR) {
4074 			error = EISDIR;
4075 			goto bad;
4076 		}
4077 		error = vn_writechk(vp);
4078 		if (error)
4079 			goto bad;
4080 		mode |= VWRITE;
4081 	}
4082 	if (fmode & FREAD)
4083 		mode |= VREAD;
4084 	if (fmode & O_APPEND)
4085 		mode |= VAPPEND;
4086 #ifdef MAC
4087 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4088 	if (error)
4089 		goto bad;
4090 #endif
4091 	if (mode) {
4092 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4093 		if (error)
4094 			goto bad;
4095 	}
4096 	if (fmode & O_TRUNC) {
4097 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4098 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4099 			vrele(vp);
4100 			goto out;
4101 		}
4102 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4103 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4104 #ifdef MAC
4105 		/*
4106 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4107 		 * should be right.
4108 		 */
4109 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4110 		if (error == 0) {
4111 #endif
4112 			VATTR_NULL(vap);
4113 			vap->va_size = 0;
4114 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4115 #ifdef MAC
4116 		}
4117 #endif
4118 		vn_finished_write(mp);
4119 		if (error)
4120 			goto bad;
4121 	}
4122 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4123 	if (error)
4124 		goto bad;
4125 
4126 	if (fmode & FWRITE)
4127 		vp->v_writecount++;
4128 
4129 	/*
4130 	 * end of vn_open code
4131 	 */
4132 
4133 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4134 		if (fmode & FWRITE)
4135 			vp->v_writecount--;
4136 		goto bad;
4137 	}
4138 	/* An extra reference on `nfp' has been held for us by falloc(). */
4139 	fp = nfp;
4140 
4141 	FILE_LOCK(nfp);
4142 	nfp->f_vnode = vp;
4143 	nfp->f_data = vp;
4144 	nfp->f_flag = fmode & FMASK;
4145 	nfp->f_type = DTYPE_VNODE;
4146 	nfp->f_ops = &vnops;
4147 	FILE_UNLOCK(nfp);
4148 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4149 		lf.l_whence = SEEK_SET;
4150 		lf.l_start = 0;
4151 		lf.l_len = 0;
4152 		if (fmode & O_EXLOCK)
4153 			lf.l_type = F_WRLCK;
4154 		else
4155 			lf.l_type = F_RDLCK;
4156 		type = F_FLOCK;
4157 		if ((fmode & FNONBLOCK) == 0)
4158 			type |= F_WAIT;
4159 		VOP_UNLOCK(vp, 0, td);
4160 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4161 			    type)) != 0) {
4162 			/*
4163 			 * The lock request failed.  Normally close the
4164 			 * descriptor but handle the case where someone might
4165 			 * have dup()d or close()d it when we weren't looking.
4166 			 */
4167 			fdclose(fdp, fp, indx, td);
4168 
4169 			/*
4170 			 * release our private reference
4171 			 */
4172 			fdrop(fp, td);
4173 			goto out;
4174 		}
4175 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4176 		fp->f_flag |= FHASLOCK;
4177 	}
4178 
4179 	VOP_UNLOCK(vp, 0, td);
4180 	fdrop(fp, td);
4181 	vfs_rel(mp);
4182 	VFS_UNLOCK_GIANT(vfslocked);
4183 	td->td_retval[0] = indx;
4184 	return (0);
4185 
4186 bad:
4187 	vput(vp);
4188 out:
4189 	vfs_rel(mp);
4190 	VFS_UNLOCK_GIANT(vfslocked);
4191 	return (error);
4192 }
4193 
4194 /*
4195  * Stat an (NFS) file handle.
4196  */
4197 #ifndef _SYS_SYSPROTO_H_
4198 struct fhstat_args {
4199 	struct fhandle *u_fhp;
4200 	struct stat *sb;
4201 };
4202 #endif
4203 int
4204 fhstat(td, uap)
4205 	struct thread *td;
4206 	register struct fhstat_args /* {
4207 		struct fhandle *u_fhp;
4208 		struct stat *sb;
4209 	} */ *uap;
4210 {
4211 	struct stat sb;
4212 	fhandle_t fh;
4213 	struct mount *mp;
4214 	struct vnode *vp;
4215 	int vfslocked;
4216 	int error;
4217 
4218 	error = priv_check(td, PRIV_VFS_FHSTAT);
4219 	if (error)
4220 		return (error);
4221 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4222 	if (error)
4223 		return (error);
4224 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4225 		return (ESTALE);
4226 	vfslocked = VFS_LOCK_GIANT(mp);
4227 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4228 		vfs_rel(mp);
4229 		VFS_UNLOCK_GIANT(vfslocked);
4230 		return (error);
4231 	}
4232 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4233 	vput(vp);
4234 	vfs_rel(mp);
4235 	VFS_UNLOCK_GIANT(vfslocked);
4236 	if (error)
4237 		return (error);
4238 	error = copyout(&sb, uap->sb, sizeof(sb));
4239 	return (error);
4240 }
4241 
4242 /*
4243  * Implement fstatfs() for (NFS) file handles.
4244  */
4245 #ifndef _SYS_SYSPROTO_H_
4246 struct fhstatfs_args {
4247 	struct fhandle *u_fhp;
4248 	struct statfs *buf;
4249 };
4250 #endif
4251 int
4252 fhstatfs(td, uap)
4253 	struct thread *td;
4254 	struct fhstatfs_args /* {
4255 		struct fhandle *u_fhp;
4256 		struct statfs *buf;
4257 	} */ *uap;
4258 {
4259 	struct statfs sf;
4260 	fhandle_t fh;
4261 	int error;
4262 
4263 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4264 	if (error)
4265 		return (error);
4266 	error = kern_fhstatfs(td, fh, &sf);
4267 	if (error)
4268 		return (error);
4269 	return (copyout(&sf, uap->buf, sizeof(sf)));
4270 }
4271 
4272 int
4273 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4274 {
4275 	struct statfs *sp;
4276 	struct mount *mp;
4277 	struct vnode *vp;
4278 	int vfslocked;
4279 	int error;
4280 
4281 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4282 	if (error)
4283 		return (error);
4284 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4285 		return (ESTALE);
4286 	vfslocked = VFS_LOCK_GIANT(mp);
4287 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4288 	if (error) {
4289 		VFS_UNLOCK_GIANT(vfslocked);
4290 		vfs_rel(mp);
4291 		return (error);
4292 	}
4293 	vput(vp);
4294 	error = prison_canseemount(td->td_ucred, mp);
4295 	if (error)
4296 		goto out;
4297 #ifdef MAC
4298 	error = mac_check_mount_stat(td->td_ucred, mp);
4299 	if (error)
4300 		goto out;
4301 #endif
4302 	/*
4303 	 * Set these in case the underlying filesystem fails to do so.
4304 	 */
4305 	sp = &mp->mnt_stat;
4306 	sp->f_version = STATFS_VERSION;
4307 	sp->f_namemax = NAME_MAX;
4308 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4309 	error = VFS_STATFS(mp, sp, td);
4310 	if (error == 0)
4311 		*buf = *sp;
4312 out:
4313 	vfs_rel(mp);
4314 	VFS_UNLOCK_GIANT(vfslocked);
4315 	return (error);
4316 }
4317