xref: /freebsd/sys/kern/vfs_syscalls.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/priv.h>
65 #include <sys/proc.h>
66 #include <sys/dirent.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 /*
92  * The module initialization routine for POSIX asynchronous I/O will
93  * set this to the version of AIO that it implements.  (Zero means
94  * that it is not implemented.)  This value is used here by pathconf()
95  * and in kern_descrip.c by fpathconf().
96  */
97 int async_io_version;
98 
99 #ifdef DEBUG
100 static int syncprt = 0;
101 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
102 #endif
103 
104 /*
105  * Sync each mounted filesystem.
106  */
107 #ifndef _SYS_SYSPROTO_H_
108 struct sync_args {
109 	int     dummy;
110 };
111 #endif
112 /* ARGSUSED */
113 int
114 sync(td, uap)
115 	struct thread *td;
116 	struct sync_args *uap;
117 {
118 	struct mount *mp, *nmp;
119 	int vfslocked;
120 
121 	mtx_lock(&mountlist_mtx);
122 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
123 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
124 			nmp = TAILQ_NEXT(mp, mnt_list);
125 			continue;
126 		}
127 		vfslocked = VFS_LOCK_GIANT(mp);
128 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
129 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
130 			MNT_ILOCK(mp);
131 			mp->mnt_noasync++;
132 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
133 			MNT_IUNLOCK(mp);
134 			vfs_msync(mp, MNT_NOWAIT);
135 			VFS_SYNC(mp, MNT_NOWAIT, td);
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync--;
138 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
139 			    mp->mnt_noasync == 0)
140 				mp->mnt_kern_flag |= MNTK_ASYNC;
141 			MNT_IUNLOCK(mp);
142 			vn_finished_write(mp);
143 		}
144 		VFS_UNLOCK_GIANT(vfslocked);
145 		mtx_lock(&mountlist_mtx);
146 		nmp = TAILQ_NEXT(mp, mnt_list);
147 		vfs_unbusy(mp, td);
148 	}
149 	mtx_unlock(&mountlist_mtx);
150 	return (0);
151 }
152 
153 /* XXX PRISON: could be per prison flag */
154 static int prison_quotas;
155 #if 0
156 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
157 #endif
158 
159 /*
160  * Change filesystem quotas.
161  */
162 #ifndef _SYS_SYSPROTO_H_
163 struct quotactl_args {
164 	char *path;
165 	int cmd;
166 	int uid;
167 	caddr_t arg;
168 };
169 #endif
170 int
171 quotactl(td, uap)
172 	struct thread *td;
173 	register struct quotactl_args /* {
174 		char *path;
175 		int cmd;
176 		int uid;
177 		caddr_t arg;
178 	} */ *uap;
179 {
180 	struct mount *mp;
181 	int vfslocked;
182 	int error;
183 	struct nameidata nd;
184 
185 	AUDIT_ARG(cmd, uap->cmd);
186 	AUDIT_ARG(uid, uap->uid);
187 	if (jailed(td->td_ucred) && !prison_quotas)
188 		return (EPERM);
189 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
190 	   UIO_USERSPACE, uap->path, td);
191 	if ((error = namei(&nd)) != 0)
192 		return (error);
193 	vfslocked = NDHASGIANT(&nd);
194 	NDFREE(&nd, NDF_ONLY_PNBUF);
195 	mp = nd.ni_vp->v_mount;
196 	if ((error = vfs_busy(mp, 0, NULL, td))) {
197 		vrele(nd.ni_vp);
198 		VFS_UNLOCK_GIANT(vfslocked);
199 		return (error);
200 	}
201 	vrele(nd.ni_vp);
202 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203 	vfs_unbusy(mp, td);
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Used by statfs conversion routines to scale the block size up if
210  * necessary so that all of the block counts are <= 'max_size'.  Note
211  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
212  * value of 'n'.
213  */
214 void
215 statfs_scale_blocks(struct statfs *sf, long max_size)
216 {
217 	uint64_t count;
218 	int shift;
219 
220 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
221 
222 	/*
223 	 * Attempt to scale the block counts to give a more accurate
224 	 * overview to userland of the ratio of free space to used
225 	 * space.  To do this, find the largest block count and compute
226 	 * a divisor that lets it fit into a signed integer <= max_size.
227 	 */
228 	if (sf->f_bavail < 0)
229 		count = -sf->f_bavail;
230 	else
231 		count = sf->f_bavail;
232 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
233 	if (count <= max_size)
234 		return;
235 
236 	count >>= flsl(max_size);
237 	shift = 0;
238 	while (count > 0) {
239 		shift++;
240 		count >>=1;
241 	}
242 
243 	sf->f_bsize <<= shift;
244 	sf->f_blocks >>= shift;
245 	sf->f_bfree >>= shift;
246 	sf->f_bavail >>= shift;
247 }
248 
249 /*
250  * Get filesystem statistics.
251  */
252 #ifndef _SYS_SYSPROTO_H_
253 struct statfs_args {
254 	char *path;
255 	struct statfs *buf;
256 };
257 #endif
258 int
259 statfs(td, uap)
260 	struct thread *td;
261 	register struct statfs_args /* {
262 		char *path;
263 		struct statfs *buf;
264 	} */ *uap;
265 {
266 	struct statfs sf;
267 	int error;
268 
269 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
270 	if (error == 0)
271 		error = copyout(&sf, uap->buf, sizeof(sf));
272 	return (error);
273 }
274 
275 int
276 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
277     struct statfs *buf)
278 {
279 	struct mount *mp;
280 	struct statfs *sp, sb;
281 	int vfslocked;
282 	int error;
283 	struct nameidata nd;
284 
285 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
286 	    pathseg, path, td);
287 	error = namei(&nd);
288 	if (error)
289 		return (error);
290 	vfslocked = NDHASGIANT(&nd);
291 	mp = nd.ni_vp->v_mount;
292 	vfs_ref(mp);
293 	NDFREE(&nd, NDF_ONLY_PNBUF);
294 	vput(nd.ni_vp);
295 #ifdef MAC
296 	error = mac_mount_check_stat(td->td_ucred, mp);
297 	if (error)
298 		goto out;
299 #endif
300 	/*
301 	 * Set these in case the underlying filesystem fails to do so.
302 	 */
303 	sp = &mp->mnt_stat;
304 	sp->f_version = STATFS_VERSION;
305 	sp->f_namemax = NAME_MAX;
306 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
307 	error = VFS_STATFS(mp, sp, td);
308 	if (error)
309 		goto out;
310 	if (priv_check(td, PRIV_VFS_GENERATION)) {
311 		bcopy(sp, &sb, sizeof(sb));
312 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
313 		prison_enforce_statfs(td->td_ucred, mp, &sb);
314 		sp = &sb;
315 	}
316 	*buf = *sp;
317 out:
318 	vfs_rel(mp);
319 	VFS_UNLOCK_GIANT(vfslocked);
320 	if (mtx_owned(&Giant))
321 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
322 	return (error);
323 }
324 
325 /*
326  * Get filesystem statistics.
327  */
328 #ifndef _SYS_SYSPROTO_H_
329 struct fstatfs_args {
330 	int fd;
331 	struct statfs *buf;
332 };
333 #endif
334 int
335 fstatfs(td, uap)
336 	struct thread *td;
337 	register struct fstatfs_args /* {
338 		int fd;
339 		struct statfs *buf;
340 	} */ *uap;
341 {
342 	struct statfs sf;
343 	int error;
344 
345 	error = kern_fstatfs(td, uap->fd, &sf);
346 	if (error == 0)
347 		error = copyout(&sf, uap->buf, sizeof(sf));
348 	return (error);
349 }
350 
351 int
352 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
353 {
354 	struct file *fp;
355 	struct mount *mp;
356 	struct statfs *sp, sb;
357 	int vfslocked;
358 	struct vnode *vp;
359 	int error;
360 
361 	AUDIT_ARG(fd, fd);
362 	error = getvnode(td->td_proc->p_fd, fd, &fp);
363 	if (error)
364 		return (error);
365 	vp = fp->f_vnode;
366 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
367 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
368 #ifdef AUDIT
369 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
370 #endif
371 	mp = vp->v_mount;
372 	if (mp)
373 		vfs_ref(mp);
374 	VOP_UNLOCK(vp, 0);
375 	fdrop(fp, td);
376 	if (vp->v_iflag & VI_DOOMED) {
377 		error = EBADF;
378 		goto out;
379 	}
380 #ifdef MAC
381 	error = mac_mount_check_stat(td->td_ucred, mp);
382 	if (error)
383 		goto out;
384 #endif
385 	/*
386 	 * Set these in case the underlying filesystem fails to do so.
387 	 */
388 	sp = &mp->mnt_stat;
389 	sp->f_version = STATFS_VERSION;
390 	sp->f_namemax = NAME_MAX;
391 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
392 	error = VFS_STATFS(mp, sp, td);
393 	if (error)
394 		goto out;
395 	if (priv_check(td, PRIV_VFS_GENERATION)) {
396 		bcopy(sp, &sb, sizeof(sb));
397 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
398 		prison_enforce_statfs(td->td_ucred, mp, &sb);
399 		sp = &sb;
400 	}
401 	*buf = *sp;
402 out:
403 	if (mp)
404 		vfs_rel(mp);
405 	VFS_UNLOCK_GIANT(vfslocked);
406 	return (error);
407 }
408 
409 /*
410  * Get statistics on all filesystems.
411  */
412 #ifndef _SYS_SYSPROTO_H_
413 struct getfsstat_args {
414 	struct statfs *buf;
415 	long bufsize;
416 	int flags;
417 };
418 #endif
419 int
420 getfsstat(td, uap)
421 	struct thread *td;
422 	register struct getfsstat_args /* {
423 		struct statfs *buf;
424 		long bufsize;
425 		int flags;
426 	} */ *uap;
427 {
428 
429 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
430 	    uap->flags));
431 }
432 
433 /*
434  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
435  * 	The caller is responsible for freeing memory which will be allocated
436  *	in '*buf'.
437  */
438 int
439 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
440     enum uio_seg bufseg, int flags)
441 {
442 	struct mount *mp, *nmp;
443 	struct statfs *sfsp, *sp, sb;
444 	size_t count, maxcount;
445 	int vfslocked;
446 	int error;
447 
448 	maxcount = bufsize / sizeof(struct statfs);
449 	if (bufsize == 0)
450 		sfsp = NULL;
451 	else if (bufseg == UIO_USERSPACE)
452 		sfsp = *buf;
453 	else /* if (bufseg == UIO_SYSSPACE) */ {
454 		count = 0;
455 		mtx_lock(&mountlist_mtx);
456 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
457 			count++;
458 		}
459 		mtx_unlock(&mountlist_mtx);
460 		if (maxcount > count)
461 			maxcount = count;
462 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
463 		    M_WAITOK);
464 	}
465 	count = 0;
466 	mtx_lock(&mountlist_mtx);
467 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
468 		if (prison_canseemount(td->td_ucred, mp) != 0) {
469 			nmp = TAILQ_NEXT(mp, mnt_list);
470 			continue;
471 		}
472 #ifdef MAC
473 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
474 			nmp = TAILQ_NEXT(mp, mnt_list);
475 			continue;
476 		}
477 #endif
478 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
479 			nmp = TAILQ_NEXT(mp, mnt_list);
480 			continue;
481 		}
482 		vfslocked = VFS_LOCK_GIANT(mp);
483 		if (sfsp && count < maxcount) {
484 			sp = &mp->mnt_stat;
485 			/*
486 			 * Set these in case the underlying filesystem
487 			 * fails to do so.
488 			 */
489 			sp->f_version = STATFS_VERSION;
490 			sp->f_namemax = NAME_MAX;
491 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
492 			/*
493 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
494 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
495 			 * overrides MNT_WAIT.
496 			 */
497 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
498 			    (flags & MNT_WAIT)) &&
499 			    (error = VFS_STATFS(mp, sp, td))) {
500 				VFS_UNLOCK_GIANT(vfslocked);
501 				mtx_lock(&mountlist_mtx);
502 				nmp = TAILQ_NEXT(mp, mnt_list);
503 				vfs_unbusy(mp, td);
504 				continue;
505 			}
506 			if (priv_check(td, PRIV_VFS_GENERATION)) {
507 				bcopy(sp, &sb, sizeof(sb));
508 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
509 				prison_enforce_statfs(td->td_ucred, mp, &sb);
510 				sp = &sb;
511 			}
512 			if (bufseg == UIO_SYSSPACE)
513 				bcopy(sp, sfsp, sizeof(*sp));
514 			else /* if (bufseg == UIO_USERSPACE) */ {
515 				error = copyout(sp, sfsp, sizeof(*sp));
516 				if (error) {
517 					vfs_unbusy(mp, td);
518 					VFS_UNLOCK_GIANT(vfslocked);
519 					return (error);
520 				}
521 			}
522 			sfsp++;
523 		}
524 		VFS_UNLOCK_GIANT(vfslocked);
525 		count++;
526 		mtx_lock(&mountlist_mtx);
527 		nmp = TAILQ_NEXT(mp, mnt_list);
528 		vfs_unbusy(mp, td);
529 	}
530 	mtx_unlock(&mountlist_mtx);
531 	if (sfsp && count > maxcount)
532 		td->td_retval[0] = maxcount;
533 	else
534 		td->td_retval[0] = count;
535 	return (0);
536 }
537 
538 #ifdef COMPAT_FREEBSD4
539 /*
540  * Get old format filesystem statistics.
541  */
542 static void cvtstatfs(struct statfs *, struct ostatfs *);
543 
544 #ifndef _SYS_SYSPROTO_H_
545 struct freebsd4_statfs_args {
546 	char *path;
547 	struct ostatfs *buf;
548 };
549 #endif
550 int
551 freebsd4_statfs(td, uap)
552 	struct thread *td;
553 	struct freebsd4_statfs_args /* {
554 		char *path;
555 		struct ostatfs *buf;
556 	} */ *uap;
557 {
558 	struct ostatfs osb;
559 	struct statfs sf;
560 	int error;
561 
562 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
563 	if (error)
564 		return (error);
565 	cvtstatfs(&sf, &osb);
566 	return (copyout(&osb, uap->buf, sizeof(osb)));
567 }
568 
569 /*
570  * Get filesystem statistics.
571  */
572 #ifndef _SYS_SYSPROTO_H_
573 struct freebsd4_fstatfs_args {
574 	int fd;
575 	struct ostatfs *buf;
576 };
577 #endif
578 int
579 freebsd4_fstatfs(td, uap)
580 	struct thread *td;
581 	struct freebsd4_fstatfs_args /* {
582 		int fd;
583 		struct ostatfs *buf;
584 	} */ *uap;
585 {
586 	struct ostatfs osb;
587 	struct statfs sf;
588 	int error;
589 
590 	error = kern_fstatfs(td, uap->fd, &sf);
591 	if (error)
592 		return (error);
593 	cvtstatfs(&sf, &osb);
594 	return (copyout(&osb, uap->buf, sizeof(osb)));
595 }
596 
597 /*
598  * Get statistics on all filesystems.
599  */
600 #ifndef _SYS_SYSPROTO_H_
601 struct freebsd4_getfsstat_args {
602 	struct ostatfs *buf;
603 	long bufsize;
604 	int flags;
605 };
606 #endif
607 int
608 freebsd4_getfsstat(td, uap)
609 	struct thread *td;
610 	register struct freebsd4_getfsstat_args /* {
611 		struct ostatfs *buf;
612 		long bufsize;
613 		int flags;
614 	} */ *uap;
615 {
616 	struct statfs *buf, *sp;
617 	struct ostatfs osb;
618 	size_t count, size;
619 	int error;
620 
621 	count = uap->bufsize / sizeof(struct ostatfs);
622 	size = count * sizeof(struct statfs);
623 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
624 	if (size > 0) {
625 		count = td->td_retval[0];
626 		sp = buf;
627 		while (count > 0 && error == 0) {
628 			cvtstatfs(sp, &osb);
629 			error = copyout(&osb, uap->buf, sizeof(osb));
630 			sp++;
631 			uap->buf++;
632 			count--;
633 		}
634 		free(buf, M_TEMP);
635 	}
636 	return (error);
637 }
638 
639 /*
640  * Implement fstatfs() for (NFS) file handles.
641  */
642 #ifndef _SYS_SYSPROTO_H_
643 struct freebsd4_fhstatfs_args {
644 	struct fhandle *u_fhp;
645 	struct ostatfs *buf;
646 };
647 #endif
648 int
649 freebsd4_fhstatfs(td, uap)
650 	struct thread *td;
651 	struct freebsd4_fhstatfs_args /* {
652 		struct fhandle *u_fhp;
653 		struct ostatfs *buf;
654 	} */ *uap;
655 {
656 	struct ostatfs osb;
657 	struct statfs sf;
658 	fhandle_t fh;
659 	int error;
660 
661 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
662 	if (error)
663 		return (error);
664 	error = kern_fhstatfs(td, fh, &sf);
665 	if (error)
666 		return (error);
667 	cvtstatfs(&sf, &osb);
668 	return (copyout(&osb, uap->buf, sizeof(osb)));
669 }
670 
671 /*
672  * Convert a new format statfs structure to an old format statfs structure.
673  */
674 static void
675 cvtstatfs(nsp, osp)
676 	struct statfs *nsp;
677 	struct ostatfs *osp;
678 {
679 
680 	statfs_scale_blocks(nsp, LONG_MAX);
681 	bzero(osp, sizeof(*osp));
682 	osp->f_bsize = nsp->f_bsize;
683 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
684 	osp->f_blocks = nsp->f_blocks;
685 	osp->f_bfree = nsp->f_bfree;
686 	osp->f_bavail = nsp->f_bavail;
687 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
688 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
689 	osp->f_owner = nsp->f_owner;
690 	osp->f_type = nsp->f_type;
691 	osp->f_flags = nsp->f_flags;
692 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
693 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
694 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
695 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
696 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
697 	    MIN(MFSNAMELEN, OMFSNAMELEN));
698 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
699 	    MIN(MNAMELEN, OMNAMELEN));
700 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
701 	    MIN(MNAMELEN, OMNAMELEN));
702 	osp->f_fsid = nsp->f_fsid;
703 }
704 #endif /* COMPAT_FREEBSD4 */
705 
706 /*
707  * Change current working directory to a given file descriptor.
708  */
709 #ifndef _SYS_SYSPROTO_H_
710 struct fchdir_args {
711 	int	fd;
712 };
713 #endif
714 int
715 fchdir(td, uap)
716 	struct thread *td;
717 	struct fchdir_args /* {
718 		int fd;
719 	} */ *uap;
720 {
721 	register struct filedesc *fdp = td->td_proc->p_fd;
722 	struct vnode *vp, *tdp, *vpold;
723 	struct mount *mp;
724 	struct file *fp;
725 	int vfslocked;
726 	int error;
727 
728 	AUDIT_ARG(fd, uap->fd);
729 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
730 		return (error);
731 	vp = fp->f_vnode;
732 	VREF(vp);
733 	fdrop(fp, td);
734 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
735 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
736 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
737 	error = change_dir(vp, td);
738 	while (!error && (mp = vp->v_mountedhere) != NULL) {
739 		int tvfslocked;
740 		if (vfs_busy(mp, 0, 0, td))
741 			continue;
742 		tvfslocked = VFS_LOCK_GIANT(mp);
743 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
744 		vfs_unbusy(mp, td);
745 		if (error) {
746 			VFS_UNLOCK_GIANT(tvfslocked);
747 			break;
748 		}
749 		vput(vp);
750 		VFS_UNLOCK_GIANT(vfslocked);
751 		vp = tdp;
752 		vfslocked = tvfslocked;
753 	}
754 	if (error) {
755 		vput(vp);
756 		VFS_UNLOCK_GIANT(vfslocked);
757 		return (error);
758 	}
759 	VOP_UNLOCK(vp, 0);
760 	VFS_UNLOCK_GIANT(vfslocked);
761 	FILEDESC_XLOCK(fdp);
762 	vpold = fdp->fd_cdir;
763 	fdp->fd_cdir = vp;
764 	FILEDESC_XUNLOCK(fdp);
765 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
766 	vrele(vpold);
767 	VFS_UNLOCK_GIANT(vfslocked);
768 	return (0);
769 }
770 
771 /*
772  * Change current working directory (``.'').
773  */
774 #ifndef _SYS_SYSPROTO_H_
775 struct chdir_args {
776 	char	*path;
777 };
778 #endif
779 int
780 chdir(td, uap)
781 	struct thread *td;
782 	struct chdir_args /* {
783 		char *path;
784 	} */ *uap;
785 {
786 
787 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
788 }
789 
790 int
791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
792 {
793 	register struct filedesc *fdp = td->td_proc->p_fd;
794 	int error;
795 	struct nameidata nd;
796 	struct vnode *vp;
797 	int vfslocked;
798 
799 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
800 	    pathseg, path, td);
801 	if ((error = namei(&nd)) != 0)
802 		return (error);
803 	vfslocked = NDHASGIANT(&nd);
804 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
805 		vput(nd.ni_vp);
806 		VFS_UNLOCK_GIANT(vfslocked);
807 		NDFREE(&nd, NDF_ONLY_PNBUF);
808 		return (error);
809 	}
810 	VOP_UNLOCK(nd.ni_vp, 0);
811 	VFS_UNLOCK_GIANT(vfslocked);
812 	NDFREE(&nd, NDF_ONLY_PNBUF);
813 	FILEDESC_XLOCK(fdp);
814 	vp = fdp->fd_cdir;
815 	fdp->fd_cdir = nd.ni_vp;
816 	FILEDESC_XUNLOCK(fdp);
817 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
818 	vrele(vp);
819 	VFS_UNLOCK_GIANT(vfslocked);
820 	return (0);
821 }
822 
823 /*
824  * Helper function for raised chroot(2) security function:  Refuse if
825  * any filedescriptors are open directories.
826  */
827 static int
828 chroot_refuse_vdir_fds(fdp)
829 	struct filedesc *fdp;
830 {
831 	struct vnode *vp;
832 	struct file *fp;
833 	int fd;
834 
835 	FILEDESC_LOCK_ASSERT(fdp);
836 
837 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
838 		fp = fget_locked(fdp, fd);
839 		if (fp == NULL)
840 			continue;
841 		if (fp->f_type == DTYPE_VNODE) {
842 			vp = fp->f_vnode;
843 			if (vp->v_type == VDIR)
844 				return (EPERM);
845 		}
846 	}
847 	return (0);
848 }
849 
850 /*
851  * This sysctl determines if we will allow a process to chroot(2) if it
852  * has a directory open:
853  *	0: disallowed for all processes.
854  *	1: allowed for processes that were not already chroot(2)'ed.
855  *	2: allowed for all processes.
856  */
857 
858 static int chroot_allow_open_directories = 1;
859 
860 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
861      &chroot_allow_open_directories, 0, "");
862 
863 /*
864  * Change notion of root (``/'') directory.
865  */
866 #ifndef _SYS_SYSPROTO_H_
867 struct chroot_args {
868 	char	*path;
869 };
870 #endif
871 int
872 chroot(td, uap)
873 	struct thread *td;
874 	struct chroot_args /* {
875 		char *path;
876 	} */ *uap;
877 {
878 	int error;
879 	struct nameidata nd;
880 	int vfslocked;
881 
882 	error = priv_check(td, PRIV_VFS_CHROOT);
883 	if (error)
884 		return (error);
885 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
886 	    UIO_USERSPACE, uap->path, td);
887 	error = namei(&nd);
888 	if (error)
889 		goto error;
890 	vfslocked = NDHASGIANT(&nd);
891 	if ((error = change_dir(nd.ni_vp, td)) != 0)
892 		goto e_vunlock;
893 #ifdef MAC
894 	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
895 		goto e_vunlock;
896 #endif
897 	VOP_UNLOCK(nd.ni_vp, 0);
898 	error = change_root(nd.ni_vp, td);
899 	vrele(nd.ni_vp);
900 	VFS_UNLOCK_GIANT(vfslocked);
901 	NDFREE(&nd, NDF_ONLY_PNBUF);
902 	return (error);
903 e_vunlock:
904 	vput(nd.ni_vp);
905 	VFS_UNLOCK_GIANT(vfslocked);
906 error:
907 	NDFREE(&nd, NDF_ONLY_PNBUF);
908 	return (error);
909 }
910 
911 /*
912  * Common routine for chroot and chdir.  Callers must provide a locked vnode
913  * instance.
914  */
915 int
916 change_dir(vp, td)
917 	struct vnode *vp;
918 	struct thread *td;
919 {
920 	int error;
921 
922 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
923 	if (vp->v_type != VDIR)
924 		return (ENOTDIR);
925 #ifdef MAC
926 	error = mac_vnode_check_chdir(td->td_ucred, vp);
927 	if (error)
928 		return (error);
929 #endif
930 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
931 	return (error);
932 }
933 
934 /*
935  * Common routine for kern_chroot() and jail_attach().  The caller is
936  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
937  * authorize this operation.
938  */
939 int
940 change_root(vp, td)
941 	struct vnode *vp;
942 	struct thread *td;
943 {
944 	struct filedesc *fdp;
945 	struct vnode *oldvp;
946 	int vfslocked;
947 	int error;
948 
949 	VFS_ASSERT_GIANT(vp->v_mount);
950 	fdp = td->td_proc->p_fd;
951 	FILEDESC_XLOCK(fdp);
952 	if (chroot_allow_open_directories == 0 ||
953 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
954 		error = chroot_refuse_vdir_fds(fdp);
955 		if (error) {
956 			FILEDESC_XUNLOCK(fdp);
957 			return (error);
958 		}
959 	}
960 	oldvp = fdp->fd_rdir;
961 	fdp->fd_rdir = vp;
962 	VREF(fdp->fd_rdir);
963 	if (!fdp->fd_jdir) {
964 		fdp->fd_jdir = vp;
965 		VREF(fdp->fd_jdir);
966 	}
967 	FILEDESC_XUNLOCK(fdp);
968 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
969 	vrele(oldvp);
970 	VFS_UNLOCK_GIANT(vfslocked);
971 	return (0);
972 }
973 
974 /*
975  * Check permissions, allocate an open file structure, and call the device
976  * open routine if any.
977  */
978 #ifndef _SYS_SYSPROTO_H_
979 struct open_args {
980 	char	*path;
981 	int	flags;
982 	int	mode;
983 };
984 #endif
985 int
986 open(td, uap)
987 	struct thread *td;
988 	register struct open_args /* {
989 		char *path;
990 		int flags;
991 		int mode;
992 	} */ *uap;
993 {
994 
995 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
996 }
997 
998 int
999 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
1000     int mode)
1001 {
1002 	struct proc *p = td->td_proc;
1003 	struct filedesc *fdp = p->p_fd;
1004 	struct file *fp;
1005 	struct vnode *vp;
1006 	struct vattr vat;
1007 	struct mount *mp;
1008 	int cmode;
1009 	struct file *nfp;
1010 	int type, indx, error;
1011 	struct flock lf;
1012 	struct nameidata nd;
1013 	int vfslocked;
1014 
1015 	AUDIT_ARG(fflags, flags);
1016 	AUDIT_ARG(mode, mode);
1017 	if ((flags & O_ACCMODE) == O_ACCMODE)
1018 		return (EINVAL);
1019 	flags = FFLAGS(flags);
1020 	error = falloc(td, &nfp, &indx);
1021 	if (error)
1022 		return (error);
1023 	/* An extra reference on `nfp' has been held for us by falloc(). */
1024 	fp = nfp;
1025 	/* Set the flags early so the finit in devfs can pick them up. */
1026 	fp->f_flag = flags & FMASK;
1027 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1028 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
1029 	td->td_dupfd = -1;		/* XXX check for fdopen */
1030  	error = vn_open(&nd, &flags, cmode, fp);
1031 	if (error) {
1032 		/*
1033 		 * If the vn_open replaced the method vector, something
1034 		 * wonderous happened deep below and we just pass it up
1035 		 * pretending we know what we do.
1036 		 */
1037 		if (error == ENXIO && fp->f_ops != &badfileops) {
1038 			fdrop(fp, td);
1039 			td->td_retval[0] = indx;
1040 			return (0);
1041 		}
1042 
1043 		/*
1044 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1045 		 * responsible for dropping the old contents of ofiles[indx]
1046 		 * if it succeeds.
1047 		 */
1048 		if ((error == ENODEV || error == ENXIO) &&
1049 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1050 		    (error =
1051 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1052 			td->td_retval[0] = indx;
1053 			fdrop(fp, td);
1054 			return (0);
1055 		}
1056 		/*
1057 		 * Clean up the descriptor, but only if another thread hadn't
1058 		 * replaced or closed it.
1059 		 */
1060 		fdclose(fdp, fp, indx, td);
1061 		fdrop(fp, td);
1062 
1063 		if (error == ERESTART)
1064 			error = EINTR;
1065 		return (error);
1066 	}
1067 	td->td_dupfd = 0;
1068 	vfslocked = NDHASGIANT(&nd);
1069 	NDFREE(&nd, NDF_ONLY_PNBUF);
1070 	vp = nd.ni_vp;
1071 
1072 	fp->f_vnode = vp;	/* XXX Does devfs need this? */
1073 	/*
1074 	 * If the file wasn't claimed by devfs bind it to the normal
1075 	 * vnode operations here.
1076 	 */
1077 	if (fp->f_ops == &badfileops) {
1078 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1079 		fp->f_seqcount = 1;
1080 		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1081 	}
1082 
1083 	VOP_UNLOCK(vp, 0);
1084 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1085 		lf.l_whence = SEEK_SET;
1086 		lf.l_start = 0;
1087 		lf.l_len = 0;
1088 		if (flags & O_EXLOCK)
1089 			lf.l_type = F_WRLCK;
1090 		else
1091 			lf.l_type = F_RDLCK;
1092 		type = F_FLOCK;
1093 		if ((flags & FNONBLOCK) == 0)
1094 			type |= F_WAIT;
1095 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1096 			    type)) != 0)
1097 			goto bad;
1098 		atomic_set_int(&fp->f_flag, FHASLOCK);
1099 	}
1100 	if (flags & O_TRUNC) {
1101 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1102 			goto bad;
1103 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1104 		VATTR_NULL(&vat);
1105 		vat.va_size = 0;
1106 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1107 #ifdef MAC
1108 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
1109 		if (error == 0)
1110 #endif
1111 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1112 		VOP_UNLOCK(vp, 0);
1113 		vn_finished_write(mp);
1114 		if (error)
1115 			goto bad;
1116 	}
1117 	VFS_UNLOCK_GIANT(vfslocked);
1118 	/*
1119 	 * Release our private reference, leaving the one associated with
1120 	 * the descriptor table intact.
1121 	 */
1122 	fdrop(fp, td);
1123 	td->td_retval[0] = indx;
1124 	return (0);
1125 bad:
1126 	VFS_UNLOCK_GIANT(vfslocked);
1127 	fdclose(fdp, fp, indx, td);
1128 	fdrop(fp, td);
1129 	return (error);
1130 }
1131 
1132 #ifdef COMPAT_43
1133 /*
1134  * Create a file.
1135  */
1136 #ifndef _SYS_SYSPROTO_H_
1137 struct ocreat_args {
1138 	char	*path;
1139 	int	mode;
1140 };
1141 #endif
1142 int
1143 ocreat(td, uap)
1144 	struct thread *td;
1145 	register struct ocreat_args /* {
1146 		char *path;
1147 		int mode;
1148 	} */ *uap;
1149 {
1150 
1151 	return (kern_open(td, uap->path, UIO_USERSPACE,
1152 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1153 }
1154 #endif /* COMPAT_43 */
1155 
1156 /*
1157  * Create a special file.
1158  */
1159 #ifndef _SYS_SYSPROTO_H_
1160 struct mknod_args {
1161 	char	*path;
1162 	int	mode;
1163 	int	dev;
1164 };
1165 #endif
1166 int
1167 mknod(td, uap)
1168 	struct thread *td;
1169 	register struct mknod_args /* {
1170 		char *path;
1171 		int mode;
1172 		int dev;
1173 	} */ *uap;
1174 {
1175 
1176 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1177 }
1178 
1179 int
1180 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1181     int dev)
1182 {
1183 	struct vnode *vp;
1184 	struct mount *mp;
1185 	struct vattr vattr;
1186 	int error;
1187 	int whiteout = 0;
1188 	struct nameidata nd;
1189 	int vfslocked;
1190 
1191 	AUDIT_ARG(mode, mode);
1192 	AUDIT_ARG(dev, dev);
1193 	switch (mode & S_IFMT) {
1194 	case S_IFCHR:
1195 	case S_IFBLK:
1196 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1197 		break;
1198 	case S_IFMT:
1199 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1200 		break;
1201 	case S_IFWHT:
1202 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1203 		break;
1204 	default:
1205 		error = EINVAL;
1206 		break;
1207 	}
1208 	if (error)
1209 		return (error);
1210 restart:
1211 	bwillwrite();
1212 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1213 	    pathseg, path, td);
1214 	if ((error = namei(&nd)) != 0)
1215 		return (error);
1216 	vfslocked = NDHASGIANT(&nd);
1217 	vp = nd.ni_vp;
1218 	if (vp != NULL) {
1219 		NDFREE(&nd, NDF_ONLY_PNBUF);
1220 		if (vp == nd.ni_dvp)
1221 			vrele(nd.ni_dvp);
1222 		else
1223 			vput(nd.ni_dvp);
1224 		vrele(vp);
1225 		VFS_UNLOCK_GIANT(vfslocked);
1226 		return (EEXIST);
1227 	} else {
1228 		VATTR_NULL(&vattr);
1229 		FILEDESC_SLOCK(td->td_proc->p_fd);
1230 		vattr.va_mode = (mode & ALLPERMS) &
1231 		    ~td->td_proc->p_fd->fd_cmask;
1232 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1233 		vattr.va_rdev = dev;
1234 		whiteout = 0;
1235 
1236 		switch (mode & S_IFMT) {
1237 		case S_IFMT:	/* used by badsect to flag bad sectors */
1238 			vattr.va_type = VBAD;
1239 			break;
1240 		case S_IFCHR:
1241 			vattr.va_type = VCHR;
1242 			break;
1243 		case S_IFBLK:
1244 			vattr.va_type = VBLK;
1245 			break;
1246 		case S_IFWHT:
1247 			whiteout = 1;
1248 			break;
1249 		default:
1250 			panic("kern_mknod: invalid mode");
1251 		}
1252 	}
1253 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1254 		NDFREE(&nd, NDF_ONLY_PNBUF);
1255 		vput(nd.ni_dvp);
1256 		VFS_UNLOCK_GIANT(vfslocked);
1257 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1258 			return (error);
1259 		goto restart;
1260 	}
1261 #ifdef MAC
1262 	if (error == 0 && !whiteout)
1263 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1264 		    &nd.ni_cnd, &vattr);
1265 #endif
1266 	if (!error) {
1267 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1268 		if (whiteout)
1269 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1270 		else {
1271 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1272 						&nd.ni_cnd, &vattr);
1273 			if (error == 0)
1274 				vput(nd.ni_vp);
1275 		}
1276 	}
1277 	NDFREE(&nd, NDF_ONLY_PNBUF);
1278 	vput(nd.ni_dvp);
1279 	vn_finished_write(mp);
1280 	VFS_UNLOCK_GIANT(vfslocked);
1281 	return (error);
1282 }
1283 
1284 /*
1285  * Create a named pipe.
1286  */
1287 #ifndef _SYS_SYSPROTO_H_
1288 struct mkfifo_args {
1289 	char	*path;
1290 	int	mode;
1291 };
1292 #endif
1293 int
1294 mkfifo(td, uap)
1295 	struct thread *td;
1296 	register struct mkfifo_args /* {
1297 		char *path;
1298 		int mode;
1299 	} */ *uap;
1300 {
1301 
1302 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1303 }
1304 
1305 int
1306 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1307 {
1308 	struct mount *mp;
1309 	struct vattr vattr;
1310 	int error;
1311 	struct nameidata nd;
1312 	int vfslocked;
1313 
1314 	AUDIT_ARG(mode, mode);
1315 restart:
1316 	bwillwrite();
1317 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1318 	    pathseg, path, td);
1319 	if ((error = namei(&nd)) != 0)
1320 		return (error);
1321 	vfslocked = NDHASGIANT(&nd);
1322 	if (nd.ni_vp != NULL) {
1323 		NDFREE(&nd, NDF_ONLY_PNBUF);
1324 		if (nd.ni_vp == nd.ni_dvp)
1325 			vrele(nd.ni_dvp);
1326 		else
1327 			vput(nd.ni_dvp);
1328 		vrele(nd.ni_vp);
1329 		VFS_UNLOCK_GIANT(vfslocked);
1330 		return (EEXIST);
1331 	}
1332 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1333 		NDFREE(&nd, NDF_ONLY_PNBUF);
1334 		vput(nd.ni_dvp);
1335 		VFS_UNLOCK_GIANT(vfslocked);
1336 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1337 			return (error);
1338 		goto restart;
1339 	}
1340 	VATTR_NULL(&vattr);
1341 	vattr.va_type = VFIFO;
1342 	FILEDESC_SLOCK(td->td_proc->p_fd);
1343 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1344 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1345 #ifdef MAC
1346 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1347 	    &vattr);
1348 	if (error)
1349 		goto out;
1350 #endif
1351 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1352 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1353 	if (error == 0)
1354 		vput(nd.ni_vp);
1355 #ifdef MAC
1356 out:
1357 #endif
1358 	vput(nd.ni_dvp);
1359 	vn_finished_write(mp);
1360 	VFS_UNLOCK_GIANT(vfslocked);
1361 	NDFREE(&nd, NDF_ONLY_PNBUF);
1362 	return (error);
1363 }
1364 
1365 /*
1366  * Make a hard file link.
1367  */
1368 #ifndef _SYS_SYSPROTO_H_
1369 struct link_args {
1370 	char	*path;
1371 	char	*link;
1372 };
1373 #endif
1374 int
1375 link(td, uap)
1376 	struct thread *td;
1377 	register struct link_args /* {
1378 		char *path;
1379 		char *link;
1380 	} */ *uap;
1381 {
1382 	int error;
1383 
1384 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1385 	return (error);
1386 }
1387 
1388 static int hardlink_check_uid = 0;
1389 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1390     &hardlink_check_uid, 0,
1391     "Unprivileged processes cannot create hard links to files owned by other "
1392     "users");
1393 static int hardlink_check_gid = 0;
1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1395     &hardlink_check_gid, 0,
1396     "Unprivileged processes cannot create hard links to files owned by other "
1397     "groups");
1398 
1399 static int
1400 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1401 {
1402 	struct vattr va;
1403 	int error;
1404 
1405 	if (!hardlink_check_uid && !hardlink_check_gid)
1406 		return (0);
1407 
1408 	error = VOP_GETATTR(vp, &va, cred, td);
1409 	if (error != 0)
1410 		return (error);
1411 
1412 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1413 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1414 		if (error)
1415 			return (error);
1416 	}
1417 
1418 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1419 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1420 		if (error)
1421 			return (error);
1422 	}
1423 
1424 	return (0);
1425 }
1426 
1427 int
1428 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1429 {
1430 	struct vnode *vp;
1431 	struct mount *mp;
1432 	struct nameidata nd;
1433 	int vfslocked;
1434 	int lvfslocked;
1435 	int error;
1436 
1437 	bwillwrite();
1438 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1439 	if ((error = namei(&nd)) != 0)
1440 		return (error);
1441 	vfslocked = NDHASGIANT(&nd);
1442 	NDFREE(&nd, NDF_ONLY_PNBUF);
1443 	vp = nd.ni_vp;
1444 	if (vp->v_type == VDIR) {
1445 		vrele(vp);
1446 		VFS_UNLOCK_GIANT(vfslocked);
1447 		return (EPERM);		/* POSIX */
1448 	}
1449 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1450 		vrele(vp);
1451 		VFS_UNLOCK_GIANT(vfslocked);
1452 		return (error);
1453 	}
1454 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1455 	    segflg, link, td);
1456 	if ((error = namei(&nd)) == 0) {
1457 		lvfslocked = NDHASGIANT(&nd);
1458 		if (nd.ni_vp != NULL) {
1459 			if (nd.ni_dvp == nd.ni_vp)
1460 				vrele(nd.ni_dvp);
1461 			else
1462 				vput(nd.ni_dvp);
1463 			vrele(nd.ni_vp);
1464 			error = EEXIST;
1465 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1466 		    == 0) {
1467 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1468 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1469 			error = can_hardlink(vp, td, td->td_ucred);
1470 			if (error == 0)
1471 #ifdef MAC
1472 				error = mac_vnode_check_link(td->td_ucred,
1473 				    nd.ni_dvp, vp, &nd.ni_cnd);
1474 			if (error == 0)
1475 #endif
1476 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1477 			VOP_UNLOCK(vp, 0);
1478 			vput(nd.ni_dvp);
1479 		}
1480 		NDFREE(&nd, NDF_ONLY_PNBUF);
1481 		VFS_UNLOCK_GIANT(lvfslocked);
1482 	}
1483 	vrele(vp);
1484 	vn_finished_write(mp);
1485 	VFS_UNLOCK_GIANT(vfslocked);
1486 	return (error);
1487 }
1488 
1489 /*
1490  * Make a symbolic link.
1491  */
1492 #ifndef _SYS_SYSPROTO_H_
1493 struct symlink_args {
1494 	char	*path;
1495 	char	*link;
1496 };
1497 #endif
1498 int
1499 symlink(td, uap)
1500 	struct thread *td;
1501 	register struct symlink_args /* {
1502 		char *path;
1503 		char *link;
1504 	} */ *uap;
1505 {
1506 
1507 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1508 }
1509 
1510 int
1511 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1512 {
1513 	struct mount *mp;
1514 	struct vattr vattr;
1515 	char *syspath;
1516 	int error;
1517 	struct nameidata nd;
1518 	int vfslocked;
1519 
1520 	if (segflg == UIO_SYSSPACE) {
1521 		syspath = path;
1522 	} else {
1523 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1524 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1525 			goto out;
1526 	}
1527 	AUDIT_ARG(text, syspath);
1528 restart:
1529 	bwillwrite();
1530 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1531 	    segflg, link, td);
1532 	if ((error = namei(&nd)) != 0)
1533 		goto out;
1534 	vfslocked = NDHASGIANT(&nd);
1535 	if (nd.ni_vp) {
1536 		NDFREE(&nd, NDF_ONLY_PNBUF);
1537 		if (nd.ni_vp == nd.ni_dvp)
1538 			vrele(nd.ni_dvp);
1539 		else
1540 			vput(nd.ni_dvp);
1541 		vrele(nd.ni_vp);
1542 		VFS_UNLOCK_GIANT(vfslocked);
1543 		error = EEXIST;
1544 		goto out;
1545 	}
1546 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1547 		NDFREE(&nd, NDF_ONLY_PNBUF);
1548 		vput(nd.ni_dvp);
1549 		VFS_UNLOCK_GIANT(vfslocked);
1550 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1551 			goto out;
1552 		goto restart;
1553 	}
1554 	VATTR_NULL(&vattr);
1555 	FILEDESC_SLOCK(td->td_proc->p_fd);
1556 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1557 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1558 #ifdef MAC
1559 	vattr.va_type = VLNK;
1560 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1561 	    &vattr);
1562 	if (error)
1563 		goto out2;
1564 #endif
1565 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1566 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1567 	if (error == 0)
1568 		vput(nd.ni_vp);
1569 #ifdef MAC
1570 out2:
1571 #endif
1572 	NDFREE(&nd, NDF_ONLY_PNBUF);
1573 	vput(nd.ni_dvp);
1574 	vn_finished_write(mp);
1575 	VFS_UNLOCK_GIANT(vfslocked);
1576 out:
1577 	if (segflg != UIO_SYSSPACE)
1578 		uma_zfree(namei_zone, syspath);
1579 	return (error);
1580 }
1581 
1582 /*
1583  * Delete a whiteout from the filesystem.
1584  */
1585 int
1586 undelete(td, uap)
1587 	struct thread *td;
1588 	register struct undelete_args /* {
1589 		char *path;
1590 	} */ *uap;
1591 {
1592 	int error;
1593 	struct mount *mp;
1594 	struct nameidata nd;
1595 	int vfslocked;
1596 
1597 restart:
1598 	bwillwrite();
1599 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1600 	    UIO_USERSPACE, uap->path, td);
1601 	error = namei(&nd);
1602 	if (error)
1603 		return (error);
1604 	vfslocked = NDHASGIANT(&nd);
1605 
1606 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1607 		NDFREE(&nd, NDF_ONLY_PNBUF);
1608 		if (nd.ni_vp == nd.ni_dvp)
1609 			vrele(nd.ni_dvp);
1610 		else
1611 			vput(nd.ni_dvp);
1612 		if (nd.ni_vp)
1613 			vrele(nd.ni_vp);
1614 		VFS_UNLOCK_GIANT(vfslocked);
1615 		return (EEXIST);
1616 	}
1617 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1618 		NDFREE(&nd, NDF_ONLY_PNBUF);
1619 		vput(nd.ni_dvp);
1620 		VFS_UNLOCK_GIANT(vfslocked);
1621 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1622 			return (error);
1623 		goto restart;
1624 	}
1625 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1626 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1627 	NDFREE(&nd, NDF_ONLY_PNBUF);
1628 	vput(nd.ni_dvp);
1629 	vn_finished_write(mp);
1630 	VFS_UNLOCK_GIANT(vfslocked);
1631 	return (error);
1632 }
1633 
1634 /*
1635  * Delete a name from the filesystem.
1636  */
1637 #ifndef _SYS_SYSPROTO_H_
1638 struct unlink_args {
1639 	char	*path;
1640 };
1641 #endif
1642 int
1643 unlink(td, uap)
1644 	struct thread *td;
1645 	struct unlink_args /* {
1646 		char *path;
1647 	} */ *uap;
1648 {
1649 	int error;
1650 
1651 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1652 	return (error);
1653 }
1654 
1655 int
1656 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1657 {
1658 	struct mount *mp;
1659 	struct vnode *vp;
1660 	int error;
1661 	struct nameidata nd;
1662 	int vfslocked;
1663 
1664 restart:
1665 	bwillwrite();
1666 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1667 	    pathseg, path, td);
1668 	if ((error = namei(&nd)) != 0)
1669 		return (error == EINVAL ? EPERM : error);
1670 	vfslocked = NDHASGIANT(&nd);
1671 	vp = nd.ni_vp;
1672 	if (vp->v_type == VDIR)
1673 		error = EPERM;		/* POSIX */
1674 	else {
1675 		/*
1676 		 * The root of a mounted filesystem cannot be deleted.
1677 		 *
1678 		 * XXX: can this only be a VDIR case?
1679 		 */
1680 		if (vp->v_vflag & VV_ROOT)
1681 			error = EBUSY;
1682 	}
1683 	if (error == 0) {
1684 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1685 			NDFREE(&nd, NDF_ONLY_PNBUF);
1686 			vput(nd.ni_dvp);
1687 			if (vp == nd.ni_dvp)
1688 				vrele(vp);
1689 			else
1690 				vput(vp);
1691 			VFS_UNLOCK_GIANT(vfslocked);
1692 			if ((error = vn_start_write(NULL, &mp,
1693 			    V_XSLEEP | PCATCH)) != 0)
1694 				return (error);
1695 			goto restart;
1696 		}
1697 #ifdef MAC
1698 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1699 		    &nd.ni_cnd);
1700 		if (error)
1701 			goto out;
1702 #endif
1703 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1704 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1705 #ifdef MAC
1706 out:
1707 #endif
1708 		vn_finished_write(mp);
1709 	}
1710 	NDFREE(&nd, NDF_ONLY_PNBUF);
1711 	vput(nd.ni_dvp);
1712 	if (vp == nd.ni_dvp)
1713 		vrele(vp);
1714 	else
1715 		vput(vp);
1716 	VFS_UNLOCK_GIANT(vfslocked);
1717 	return (error);
1718 }
1719 
1720 /*
1721  * Reposition read/write file offset.
1722  */
1723 #ifndef _SYS_SYSPROTO_H_
1724 struct lseek_args {
1725 	int	fd;
1726 	int	pad;
1727 	off_t	offset;
1728 	int	whence;
1729 };
1730 #endif
1731 int
1732 lseek(td, uap)
1733 	struct thread *td;
1734 	register struct lseek_args /* {
1735 		int fd;
1736 		int pad;
1737 		off_t offset;
1738 		int whence;
1739 	} */ *uap;
1740 {
1741 	struct ucred *cred = td->td_ucred;
1742 	struct file *fp;
1743 	struct vnode *vp;
1744 	struct vattr vattr;
1745 	off_t offset;
1746 	int error, noneg;
1747 	int vfslocked;
1748 
1749 	if ((error = fget(td, uap->fd, &fp)) != 0)
1750 		return (error);
1751 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1752 		fdrop(fp, td);
1753 		return (ESPIPE);
1754 	}
1755 	vp = fp->f_vnode;
1756 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1757 	noneg = (vp->v_type != VCHR);
1758 	offset = uap->offset;
1759 	switch (uap->whence) {
1760 	case L_INCR:
1761 		if (noneg &&
1762 		    (fp->f_offset < 0 ||
1763 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1764 			error = EOVERFLOW;
1765 			break;
1766 		}
1767 		offset += fp->f_offset;
1768 		break;
1769 	case L_XTND:
1770 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1771 		error = VOP_GETATTR(vp, &vattr, cred, td);
1772 		VOP_UNLOCK(vp, 0);
1773 		if (error)
1774 			break;
1775 		if (noneg &&
1776 		    (vattr.va_size > OFF_MAX ||
1777 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1778 			error = EOVERFLOW;
1779 			break;
1780 		}
1781 		offset += vattr.va_size;
1782 		break;
1783 	case L_SET:
1784 		break;
1785 	case SEEK_DATA:
1786 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1787 		break;
1788 	case SEEK_HOLE:
1789 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1790 		break;
1791 	default:
1792 		error = EINVAL;
1793 	}
1794 	if (error == 0 && noneg && offset < 0)
1795 		error = EINVAL;
1796 	if (error != 0)
1797 		goto drop;
1798 	fp->f_offset = offset;
1799 	*(off_t *)(td->td_retval) = fp->f_offset;
1800 drop:
1801 	fdrop(fp, td);
1802 	VFS_UNLOCK_GIANT(vfslocked);
1803 	return (error);
1804 }
1805 
1806 #if defined(COMPAT_43)
1807 /*
1808  * Reposition read/write file offset.
1809  */
1810 #ifndef _SYS_SYSPROTO_H_
1811 struct olseek_args {
1812 	int	fd;
1813 	long	offset;
1814 	int	whence;
1815 };
1816 #endif
1817 int
1818 olseek(td, uap)
1819 	struct thread *td;
1820 	register struct olseek_args /* {
1821 		int fd;
1822 		long offset;
1823 		int whence;
1824 	} */ *uap;
1825 {
1826 	struct lseek_args /* {
1827 		int fd;
1828 		int pad;
1829 		off_t offset;
1830 		int whence;
1831 	} */ nuap;
1832 
1833 	nuap.fd = uap->fd;
1834 	nuap.offset = uap->offset;
1835 	nuap.whence = uap->whence;
1836 	return (lseek(td, &nuap));
1837 }
1838 #endif /* COMPAT_43 */
1839 
1840 /* Version with the 'pad' argument */
1841 int
1842 freebsd6_lseek(td, uap)
1843 	struct thread *td;
1844 	register struct freebsd6_lseek_args *uap;
1845 {
1846 	struct lseek_args ouap;
1847 
1848 	ouap.fd = uap->fd;
1849 	ouap.offset = uap->offset;
1850 	ouap.whence = uap->whence;
1851 	return (lseek(td, &ouap));
1852 }
1853 
1854 /*
1855  * Check access permissions using passed credentials.
1856  */
1857 static int
1858 vn_access(vp, user_flags, cred, td)
1859 	struct vnode	*vp;
1860 	int		user_flags;
1861 	struct ucred	*cred;
1862 	struct thread	*td;
1863 {
1864 	int error, flags;
1865 
1866 	/* Flags == 0 means only check for existence. */
1867 	error = 0;
1868 	if (user_flags) {
1869 		flags = 0;
1870 		if (user_flags & R_OK)
1871 			flags |= VREAD;
1872 		if (user_flags & W_OK)
1873 			flags |= VWRITE;
1874 		if (user_flags & X_OK)
1875 			flags |= VEXEC;
1876 #ifdef MAC
1877 		error = mac_vnode_check_access(cred, vp, flags);
1878 		if (error)
1879 			return (error);
1880 #endif
1881 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1882 			error = VOP_ACCESS(vp, flags, cred, td);
1883 	}
1884 	return (error);
1885 }
1886 
1887 /*
1888  * Check access permissions using "real" credentials.
1889  */
1890 #ifndef _SYS_SYSPROTO_H_
1891 struct access_args {
1892 	char	*path;
1893 	int	flags;
1894 };
1895 #endif
1896 int
1897 access(td, uap)
1898 	struct thread *td;
1899 	register struct access_args /* {
1900 		char *path;
1901 		int flags;
1902 	} */ *uap;
1903 {
1904 
1905 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1906 }
1907 
1908 int
1909 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1910 {
1911 	struct ucred *cred, *tmpcred;
1912 	register struct vnode *vp;
1913 	struct nameidata nd;
1914 	int vfslocked;
1915 	int error;
1916 
1917 	/*
1918 	 * Create and modify a temporary credential instead of one that
1919 	 * is potentially shared.  This could also mess up socket
1920 	 * buffer accounting which can run in an interrupt context.
1921 	 */
1922 	cred = td->td_ucred;
1923 	tmpcred = crdup(cred);
1924 	tmpcred->cr_uid = cred->cr_ruid;
1925 	tmpcred->cr_groups[0] = cred->cr_rgid;
1926 	td->td_ucred = tmpcred;
1927 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1928 	    pathseg, path, td);
1929 	if ((error = namei(&nd)) != 0)
1930 		goto out1;
1931 	vfslocked = NDHASGIANT(&nd);
1932 	vp = nd.ni_vp;
1933 
1934 	error = vn_access(vp, flags, tmpcred, td);
1935 	NDFREE(&nd, NDF_ONLY_PNBUF);
1936 	vput(vp);
1937 	VFS_UNLOCK_GIANT(vfslocked);
1938 out1:
1939 	td->td_ucred = cred;
1940 	crfree(tmpcred);
1941 	return (error);
1942 }
1943 
1944 /*
1945  * Check access permissions using "effective" credentials.
1946  */
1947 #ifndef _SYS_SYSPROTO_H_
1948 struct eaccess_args {
1949 	char	*path;
1950 	int	flags;
1951 };
1952 #endif
1953 int
1954 eaccess(td, uap)
1955 	struct thread *td;
1956 	register struct eaccess_args /* {
1957 		char *path;
1958 		int flags;
1959 	} */ *uap;
1960 {
1961 
1962 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1963 }
1964 
1965 int
1966 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1967 {
1968 	struct nameidata nd;
1969 	struct vnode *vp;
1970 	int vfslocked;
1971 	int error;
1972 
1973 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1974 	    pathseg, path, td);
1975 	if ((error = namei(&nd)) != 0)
1976 		return (error);
1977 	vp = nd.ni_vp;
1978 	vfslocked = NDHASGIANT(&nd);
1979 	error = vn_access(vp, flags, td->td_ucred, td);
1980 	NDFREE(&nd, NDF_ONLY_PNBUF);
1981 	vput(vp);
1982 	VFS_UNLOCK_GIANT(vfslocked);
1983 	return (error);
1984 }
1985 
1986 #if defined(COMPAT_43)
1987 /*
1988  * Get file status; this version follows links.
1989  */
1990 #ifndef _SYS_SYSPROTO_H_
1991 struct ostat_args {
1992 	char	*path;
1993 	struct ostat *ub;
1994 };
1995 #endif
1996 int
1997 ostat(td, uap)
1998 	struct thread *td;
1999 	register struct ostat_args /* {
2000 		char *path;
2001 		struct ostat *ub;
2002 	} */ *uap;
2003 {
2004 	struct stat sb;
2005 	struct ostat osb;
2006 	int error;
2007 
2008 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2009 	if (error)
2010 		return (error);
2011 	cvtstat(&sb, &osb);
2012 	error = copyout(&osb, uap->ub, sizeof (osb));
2013 	return (error);
2014 }
2015 
2016 /*
2017  * Get file status; this version does not follow links.
2018  */
2019 #ifndef _SYS_SYSPROTO_H_
2020 struct olstat_args {
2021 	char	*path;
2022 	struct ostat *ub;
2023 };
2024 #endif
2025 int
2026 olstat(td, uap)
2027 	struct thread *td;
2028 	register struct olstat_args /* {
2029 		char *path;
2030 		struct ostat *ub;
2031 	} */ *uap;
2032 {
2033 	struct stat sb;
2034 	struct ostat osb;
2035 	int error;
2036 
2037 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2038 	if (error)
2039 		return (error);
2040 	cvtstat(&sb, &osb);
2041 	error = copyout(&osb, uap->ub, sizeof (osb));
2042 	return (error);
2043 }
2044 
2045 /*
2046  * Convert from an old to a new stat structure.
2047  */
2048 void
2049 cvtstat(st, ost)
2050 	struct stat *st;
2051 	struct ostat *ost;
2052 {
2053 
2054 	ost->st_dev = st->st_dev;
2055 	ost->st_ino = st->st_ino;
2056 	ost->st_mode = st->st_mode;
2057 	ost->st_nlink = st->st_nlink;
2058 	ost->st_uid = st->st_uid;
2059 	ost->st_gid = st->st_gid;
2060 	ost->st_rdev = st->st_rdev;
2061 	if (st->st_size < (quad_t)1 << 32)
2062 		ost->st_size = st->st_size;
2063 	else
2064 		ost->st_size = -2;
2065 	ost->st_atime = st->st_atime;
2066 	ost->st_mtime = st->st_mtime;
2067 	ost->st_ctime = st->st_ctime;
2068 	ost->st_blksize = st->st_blksize;
2069 	ost->st_blocks = st->st_blocks;
2070 	ost->st_flags = st->st_flags;
2071 	ost->st_gen = st->st_gen;
2072 }
2073 #endif /* COMPAT_43 */
2074 
2075 /*
2076  * Get file status; this version follows links.
2077  */
2078 #ifndef _SYS_SYSPROTO_H_
2079 struct stat_args {
2080 	char	*path;
2081 	struct stat *ub;
2082 };
2083 #endif
2084 int
2085 stat(td, uap)
2086 	struct thread *td;
2087 	register struct stat_args /* {
2088 		char *path;
2089 		struct stat *ub;
2090 	} */ *uap;
2091 {
2092 	struct stat sb;
2093 	int error;
2094 
2095 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2096 	if (error == 0)
2097 		error = copyout(&sb, uap->ub, sizeof (sb));
2098 	return (error);
2099 }
2100 
2101 int
2102 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2103 {
2104 	struct nameidata nd;
2105 	struct stat sb;
2106 	int error, vfslocked;
2107 
2108 	NDINIT(&nd, LOOKUP,
2109 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2110 	    pathseg, path, td);
2111 	if ((error = namei(&nd)) != 0)
2112 		return (error);
2113 	vfslocked = NDHASGIANT(&nd);
2114 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2115 	NDFREE(&nd, NDF_ONLY_PNBUF);
2116 	vput(nd.ni_vp);
2117 	VFS_UNLOCK_GIANT(vfslocked);
2118 	if (mtx_owned(&Giant))
2119 		printf("stat(%d): %s\n", vfslocked, path);
2120 	if (error)
2121 		return (error);
2122 	*sbp = sb;
2123 	return (0);
2124 }
2125 
2126 /*
2127  * Get file status; this version does not follow links.
2128  */
2129 #ifndef _SYS_SYSPROTO_H_
2130 struct lstat_args {
2131 	char	*path;
2132 	struct stat *ub;
2133 };
2134 #endif
2135 int
2136 lstat(td, uap)
2137 	struct thread *td;
2138 	register struct lstat_args /* {
2139 		char *path;
2140 		struct stat *ub;
2141 	} */ *uap;
2142 {
2143 	struct stat sb;
2144 	int error;
2145 
2146 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2147 	if (error == 0)
2148 		error = copyout(&sb, uap->ub, sizeof (sb));
2149 	return (error);
2150 }
2151 
2152 int
2153 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2154 {
2155 	struct vnode *vp;
2156 	struct stat sb;
2157 	struct nameidata nd;
2158 	int error, vfslocked;
2159 
2160 	NDINIT(&nd, LOOKUP,
2161 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2162 	    pathseg, path, td);
2163 	if ((error = namei(&nd)) != 0)
2164 		return (error);
2165 	vfslocked = NDHASGIANT(&nd);
2166 	vp = nd.ni_vp;
2167 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2168 	NDFREE(&nd, NDF_ONLY_PNBUF);
2169 	vput(vp);
2170 	VFS_UNLOCK_GIANT(vfslocked);
2171 	if (error)
2172 		return (error);
2173 	*sbp = sb;
2174 	return (0);
2175 }
2176 
2177 /*
2178  * Implementation of the NetBSD [l]stat() functions.
2179  */
2180 void
2181 cvtnstat(sb, nsb)
2182 	struct stat *sb;
2183 	struct nstat *nsb;
2184 {
2185 	bzero(nsb, sizeof *nsb);
2186 	nsb->st_dev = sb->st_dev;
2187 	nsb->st_ino = sb->st_ino;
2188 	nsb->st_mode = sb->st_mode;
2189 	nsb->st_nlink = sb->st_nlink;
2190 	nsb->st_uid = sb->st_uid;
2191 	nsb->st_gid = sb->st_gid;
2192 	nsb->st_rdev = sb->st_rdev;
2193 	nsb->st_atimespec = sb->st_atimespec;
2194 	nsb->st_mtimespec = sb->st_mtimespec;
2195 	nsb->st_ctimespec = sb->st_ctimespec;
2196 	nsb->st_size = sb->st_size;
2197 	nsb->st_blocks = sb->st_blocks;
2198 	nsb->st_blksize = sb->st_blksize;
2199 	nsb->st_flags = sb->st_flags;
2200 	nsb->st_gen = sb->st_gen;
2201 	nsb->st_birthtimespec = sb->st_birthtimespec;
2202 }
2203 
2204 #ifndef _SYS_SYSPROTO_H_
2205 struct nstat_args {
2206 	char	*path;
2207 	struct nstat *ub;
2208 };
2209 #endif
2210 int
2211 nstat(td, uap)
2212 	struct thread *td;
2213 	register struct nstat_args /* {
2214 		char *path;
2215 		struct nstat *ub;
2216 	} */ *uap;
2217 {
2218 	struct stat sb;
2219 	struct nstat nsb;
2220 	int error;
2221 
2222 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2223 	if (error)
2224 		return (error);
2225 	cvtnstat(&sb, &nsb);
2226 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2227 	return (error);
2228 }
2229 
2230 /*
2231  * NetBSD lstat.  Get file status; this version does not follow links.
2232  */
2233 #ifndef _SYS_SYSPROTO_H_
2234 struct lstat_args {
2235 	char	*path;
2236 	struct stat *ub;
2237 };
2238 #endif
2239 int
2240 nlstat(td, uap)
2241 	struct thread *td;
2242 	register struct nlstat_args /* {
2243 		char *path;
2244 		struct nstat *ub;
2245 	} */ *uap;
2246 {
2247 	struct stat sb;
2248 	struct nstat nsb;
2249 	int error;
2250 
2251 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2252 	if (error)
2253 		return (error);
2254 	cvtnstat(&sb, &nsb);
2255 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2256 	return (error);
2257 }
2258 
2259 /*
2260  * Get configurable pathname variables.
2261  */
2262 #ifndef _SYS_SYSPROTO_H_
2263 struct pathconf_args {
2264 	char	*path;
2265 	int	name;
2266 };
2267 #endif
2268 int
2269 pathconf(td, uap)
2270 	struct thread *td;
2271 	register struct pathconf_args /* {
2272 		char *path;
2273 		int name;
2274 	} */ *uap;
2275 {
2276 
2277 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2278 }
2279 
2280 int
2281 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2282 {
2283 	struct nameidata nd;
2284 	int error, vfslocked;
2285 
2286 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2287 	    pathseg, path, td);
2288 	if ((error = namei(&nd)) != 0)
2289 		return (error);
2290 	vfslocked = NDHASGIANT(&nd);
2291 	NDFREE(&nd, NDF_ONLY_PNBUF);
2292 
2293 	/* If asynchronous I/O is available, it works for all files. */
2294 	if (name == _PC_ASYNC_IO)
2295 		td->td_retval[0] = async_io_version;
2296 	else
2297 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2298 	vput(nd.ni_vp);
2299 	VFS_UNLOCK_GIANT(vfslocked);
2300 	return (error);
2301 }
2302 
2303 /*
2304  * Return target name of a symbolic link.
2305  */
2306 #ifndef _SYS_SYSPROTO_H_
2307 struct readlink_args {
2308 	char	*path;
2309 	char	*buf;
2310 	int	count;
2311 };
2312 #endif
2313 int
2314 readlink(td, uap)
2315 	struct thread *td;
2316 	register struct readlink_args /* {
2317 		char *path;
2318 		char *buf;
2319 		int count;
2320 	} */ *uap;
2321 {
2322 
2323 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2324 	    UIO_USERSPACE, uap->count));
2325 }
2326 
2327 int
2328 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2329     enum uio_seg bufseg, int count)
2330 {
2331 	register struct vnode *vp;
2332 	struct iovec aiov;
2333 	struct uio auio;
2334 	int error;
2335 	struct nameidata nd;
2336 	int vfslocked;
2337 
2338 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2339 	    pathseg, path, td);
2340 	if ((error = namei(&nd)) != 0)
2341 		return (error);
2342 	NDFREE(&nd, NDF_ONLY_PNBUF);
2343 	vfslocked = NDHASGIANT(&nd);
2344 	vp = nd.ni_vp;
2345 #ifdef MAC
2346 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2347 	if (error) {
2348 		vput(vp);
2349 		VFS_UNLOCK_GIANT(vfslocked);
2350 		return (error);
2351 	}
2352 #endif
2353 	if (vp->v_type != VLNK)
2354 		error = EINVAL;
2355 	else {
2356 		aiov.iov_base = buf;
2357 		aiov.iov_len = count;
2358 		auio.uio_iov = &aiov;
2359 		auio.uio_iovcnt = 1;
2360 		auio.uio_offset = 0;
2361 		auio.uio_rw = UIO_READ;
2362 		auio.uio_segflg = bufseg;
2363 		auio.uio_td = td;
2364 		auio.uio_resid = count;
2365 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2366 	}
2367 	vput(vp);
2368 	VFS_UNLOCK_GIANT(vfslocked);
2369 	td->td_retval[0] = count - auio.uio_resid;
2370 	return (error);
2371 }
2372 
2373 /*
2374  * Common implementation code for chflags() and fchflags().
2375  */
2376 static int
2377 setfflags(td, vp, flags)
2378 	struct thread *td;
2379 	struct vnode *vp;
2380 	int flags;
2381 {
2382 	int error;
2383 	struct mount *mp;
2384 	struct vattr vattr;
2385 
2386 	/*
2387 	 * Prevent non-root users from setting flags on devices.  When
2388 	 * a device is reused, users can retain ownership of the device
2389 	 * if they are allowed to set flags and programs assume that
2390 	 * chown can't fail when done as root.
2391 	 */
2392 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2393 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2394 		if (error)
2395 			return (error);
2396 	}
2397 
2398 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2399 		return (error);
2400 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2401 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2402 	VATTR_NULL(&vattr);
2403 	vattr.va_flags = flags;
2404 #ifdef MAC
2405 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2406 	if (error == 0)
2407 #endif
2408 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2409 	VOP_UNLOCK(vp, 0);
2410 	vn_finished_write(mp);
2411 	return (error);
2412 }
2413 
2414 /*
2415  * Change flags of a file given a path name.
2416  */
2417 #ifndef _SYS_SYSPROTO_H_
2418 struct chflags_args {
2419 	char	*path;
2420 	int	flags;
2421 };
2422 #endif
2423 int
2424 chflags(td, uap)
2425 	struct thread *td;
2426 	register struct chflags_args /* {
2427 		char *path;
2428 		int flags;
2429 	} */ *uap;
2430 {
2431 	int error;
2432 	struct nameidata nd;
2433 	int vfslocked;
2434 
2435 	AUDIT_ARG(fflags, uap->flags);
2436 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2437 	    uap->path, td);
2438 	if ((error = namei(&nd)) != 0)
2439 		return (error);
2440 	NDFREE(&nd, NDF_ONLY_PNBUF);
2441 	vfslocked = NDHASGIANT(&nd);
2442 	error = setfflags(td, nd.ni_vp, uap->flags);
2443 	vrele(nd.ni_vp);
2444 	VFS_UNLOCK_GIANT(vfslocked);
2445 	return (error);
2446 }
2447 
2448 /*
2449  * Same as chflags() but doesn't follow symlinks.
2450  */
2451 int
2452 lchflags(td, uap)
2453 	struct thread *td;
2454 	register struct lchflags_args /* {
2455 		char *path;
2456 		int flags;
2457 	} */ *uap;
2458 {
2459 	int error;
2460 	struct nameidata nd;
2461 	int vfslocked;
2462 
2463 	AUDIT_ARG(fflags, uap->flags);
2464 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2465 	    uap->path, td);
2466 	if ((error = namei(&nd)) != 0)
2467 		return (error);
2468 	vfslocked = NDHASGIANT(&nd);
2469 	NDFREE(&nd, NDF_ONLY_PNBUF);
2470 	error = setfflags(td, nd.ni_vp, uap->flags);
2471 	vrele(nd.ni_vp);
2472 	VFS_UNLOCK_GIANT(vfslocked);
2473 	return (error);
2474 }
2475 
2476 /*
2477  * Change flags of a file given a file descriptor.
2478  */
2479 #ifndef _SYS_SYSPROTO_H_
2480 struct fchflags_args {
2481 	int	fd;
2482 	int	flags;
2483 };
2484 #endif
2485 int
2486 fchflags(td, uap)
2487 	struct thread *td;
2488 	register struct fchflags_args /* {
2489 		int fd;
2490 		int flags;
2491 	} */ *uap;
2492 {
2493 	struct file *fp;
2494 	int vfslocked;
2495 	int error;
2496 
2497 	AUDIT_ARG(fd, uap->fd);
2498 	AUDIT_ARG(fflags, uap->flags);
2499 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2500 		return (error);
2501 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2502 #ifdef AUDIT
2503 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY);
2504 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2505 	VOP_UNLOCK(fp->f_vnode, 0);
2506 #endif
2507 	error = setfflags(td, fp->f_vnode, uap->flags);
2508 	VFS_UNLOCK_GIANT(vfslocked);
2509 	fdrop(fp, td);
2510 	return (error);
2511 }
2512 
2513 /*
2514  * Common implementation code for chmod(), lchmod() and fchmod().
2515  */
2516 static int
2517 setfmode(td, vp, mode)
2518 	struct thread *td;
2519 	struct vnode *vp;
2520 	int mode;
2521 {
2522 	int error;
2523 	struct mount *mp;
2524 	struct vattr vattr;
2525 
2526 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2527 		return (error);
2528 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2529 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2530 	VATTR_NULL(&vattr);
2531 	vattr.va_mode = mode & ALLPERMS;
2532 #ifdef MAC
2533 	error = mac_vnode_check_setmode(td->td_ucred, vp, vattr.va_mode);
2534 	if (error == 0)
2535 #endif
2536 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2537 	VOP_UNLOCK(vp, 0);
2538 	vn_finished_write(mp);
2539 	return (error);
2540 }
2541 
2542 /*
2543  * Change mode of a file given path name.
2544  */
2545 #ifndef _SYS_SYSPROTO_H_
2546 struct chmod_args {
2547 	char	*path;
2548 	int	mode;
2549 };
2550 #endif
2551 int
2552 chmod(td, uap)
2553 	struct thread *td;
2554 	register struct chmod_args /* {
2555 		char *path;
2556 		int mode;
2557 	} */ *uap;
2558 {
2559 
2560 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2561 }
2562 
2563 int
2564 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2565 {
2566 	int error;
2567 	struct nameidata nd;
2568 	int vfslocked;
2569 
2570 	AUDIT_ARG(mode, mode);
2571 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2572 	if ((error = namei(&nd)) != 0)
2573 		return (error);
2574 	vfslocked = NDHASGIANT(&nd);
2575 	NDFREE(&nd, NDF_ONLY_PNBUF);
2576 	error = setfmode(td, nd.ni_vp, mode);
2577 	vrele(nd.ni_vp);
2578 	VFS_UNLOCK_GIANT(vfslocked);
2579 	return (error);
2580 }
2581 
2582 /*
2583  * Change mode of a file given path name (don't follow links.)
2584  */
2585 #ifndef _SYS_SYSPROTO_H_
2586 struct lchmod_args {
2587 	char	*path;
2588 	int	mode;
2589 };
2590 #endif
2591 int
2592 lchmod(td, uap)
2593 	struct thread *td;
2594 	register struct lchmod_args /* {
2595 		char *path;
2596 		int mode;
2597 	} */ *uap;
2598 {
2599 	int error;
2600 	struct nameidata nd;
2601 	int vfslocked;
2602 
2603 	AUDIT_ARG(mode, (mode_t)uap->mode);
2604 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2605 	    uap->path, td);
2606 	if ((error = namei(&nd)) != 0)
2607 		return (error);
2608 	vfslocked = NDHASGIANT(&nd);
2609 	NDFREE(&nd, NDF_ONLY_PNBUF);
2610 	error = setfmode(td, nd.ni_vp, uap->mode);
2611 	vrele(nd.ni_vp);
2612 	VFS_UNLOCK_GIANT(vfslocked);
2613 	return (error);
2614 }
2615 
2616 /*
2617  * Change mode of a file given a file descriptor.
2618  */
2619 #ifndef _SYS_SYSPROTO_H_
2620 struct fchmod_args {
2621 	int	fd;
2622 	int	mode;
2623 };
2624 #endif
2625 int
2626 fchmod(td, uap)
2627 	struct thread *td;
2628 	register struct fchmod_args /* {
2629 		int fd;
2630 		int mode;
2631 	} */ *uap;
2632 {
2633 	struct file *fp;
2634 	int vfslocked;
2635 	int error;
2636 
2637 	AUDIT_ARG(fd, uap->fd);
2638 	AUDIT_ARG(mode, uap->mode);
2639 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2640 		return (error);
2641 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2642 #ifdef AUDIT
2643 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY);
2644 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2645 	VOP_UNLOCK(fp->f_vnode, 0);
2646 #endif
2647 	error = setfmode(td, fp->f_vnode, uap->mode);
2648 	VFS_UNLOCK_GIANT(vfslocked);
2649 	fdrop(fp, td);
2650 	return (error);
2651 }
2652 
2653 /*
2654  * Common implementation for chown(), lchown(), and fchown()
2655  */
2656 static int
2657 setfown(td, vp, uid, gid)
2658 	struct thread *td;
2659 	struct vnode *vp;
2660 	uid_t uid;
2661 	gid_t gid;
2662 {
2663 	int error;
2664 	struct mount *mp;
2665 	struct vattr vattr;
2666 
2667 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2668 		return (error);
2669 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2670 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2671 	VATTR_NULL(&vattr);
2672 	vattr.va_uid = uid;
2673 	vattr.va_gid = gid;
2674 #ifdef MAC
2675 	error = mac_vnode_check_setowner(td->td_ucred, vp, vattr.va_uid,
2676 	    vattr.va_gid);
2677 	if (error == 0)
2678 #endif
2679 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2680 	VOP_UNLOCK(vp, 0);
2681 	vn_finished_write(mp);
2682 	return (error);
2683 }
2684 
2685 /*
2686  * Set ownership given a path name.
2687  */
2688 #ifndef _SYS_SYSPROTO_H_
2689 struct chown_args {
2690 	char	*path;
2691 	int	uid;
2692 	int	gid;
2693 };
2694 #endif
2695 int
2696 chown(td, uap)
2697 	struct thread *td;
2698 	register struct chown_args /* {
2699 		char *path;
2700 		int uid;
2701 		int gid;
2702 	} */ *uap;
2703 {
2704 
2705 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2706 }
2707 
2708 int
2709 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2710     int gid)
2711 {
2712 	int error;
2713 	struct nameidata nd;
2714 	int vfslocked;
2715 
2716 	AUDIT_ARG(owner, uid, gid);
2717 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2718 	if ((error = namei(&nd)) != 0)
2719 		return (error);
2720 	vfslocked = NDHASGIANT(&nd);
2721 	NDFREE(&nd, NDF_ONLY_PNBUF);
2722 	error = setfown(td, nd.ni_vp, uid, gid);
2723 	vrele(nd.ni_vp);
2724 	VFS_UNLOCK_GIANT(vfslocked);
2725 	return (error);
2726 }
2727 
2728 /*
2729  * Set ownership given a path name, do not cross symlinks.
2730  */
2731 #ifndef _SYS_SYSPROTO_H_
2732 struct lchown_args {
2733 	char	*path;
2734 	int	uid;
2735 	int	gid;
2736 };
2737 #endif
2738 int
2739 lchown(td, uap)
2740 	struct thread *td;
2741 	register struct lchown_args /* {
2742 		char *path;
2743 		int uid;
2744 		int gid;
2745 	} */ *uap;
2746 {
2747 
2748 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2749 }
2750 
2751 int
2752 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2753     int gid)
2754 {
2755 	int error;
2756 	struct nameidata nd;
2757 	int vfslocked;
2758 
2759 	AUDIT_ARG(owner, uid, gid);
2760 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2761 	if ((error = namei(&nd)) != 0)
2762 		return (error);
2763 	vfslocked = NDHASGIANT(&nd);
2764 	NDFREE(&nd, NDF_ONLY_PNBUF);
2765 	error = setfown(td, nd.ni_vp, uid, gid);
2766 	vrele(nd.ni_vp);
2767 	VFS_UNLOCK_GIANT(vfslocked);
2768 	return (error);
2769 }
2770 
2771 /*
2772  * Set ownership given a file descriptor.
2773  */
2774 #ifndef _SYS_SYSPROTO_H_
2775 struct fchown_args {
2776 	int	fd;
2777 	int	uid;
2778 	int	gid;
2779 };
2780 #endif
2781 int
2782 fchown(td, uap)
2783 	struct thread *td;
2784 	register struct fchown_args /* {
2785 		int fd;
2786 		int uid;
2787 		int gid;
2788 	} */ *uap;
2789 {
2790 	struct file *fp;
2791 	int vfslocked;
2792 	int error;
2793 
2794 	AUDIT_ARG(fd, uap->fd);
2795 	AUDIT_ARG(owner, uap->uid, uap->gid);
2796 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2797 		return (error);
2798 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2799 #ifdef AUDIT
2800 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY);
2801 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2802 	VOP_UNLOCK(fp->f_vnode, 0);
2803 #endif
2804 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2805 	VFS_UNLOCK_GIANT(vfslocked);
2806 	fdrop(fp, td);
2807 	return (error);
2808 }
2809 
2810 /*
2811  * Common implementation code for utimes(), lutimes(), and futimes().
2812  */
2813 static int
2814 getutimes(usrtvp, tvpseg, tsp)
2815 	const struct timeval *usrtvp;
2816 	enum uio_seg tvpseg;
2817 	struct timespec *tsp;
2818 {
2819 	struct timeval tv[2];
2820 	const struct timeval *tvp;
2821 	int error;
2822 
2823 	if (usrtvp == NULL) {
2824 		microtime(&tv[0]);
2825 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2826 		tsp[1] = tsp[0];
2827 	} else {
2828 		if (tvpseg == UIO_SYSSPACE) {
2829 			tvp = usrtvp;
2830 		} else {
2831 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2832 				return (error);
2833 			tvp = tv;
2834 		}
2835 
2836 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2837 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2838 			return (EINVAL);
2839 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2840 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2841 	}
2842 	return (0);
2843 }
2844 
2845 /*
2846  * Common implementation code for utimes(), lutimes(), and futimes().
2847  */
2848 static int
2849 setutimes(td, vp, ts, numtimes, nullflag)
2850 	struct thread *td;
2851 	struct vnode *vp;
2852 	const struct timespec *ts;
2853 	int numtimes;
2854 	int nullflag;
2855 {
2856 	int error, setbirthtime;
2857 	struct mount *mp;
2858 	struct vattr vattr;
2859 
2860 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2861 		return (error);
2862 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2863 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2864 	setbirthtime = 0;
2865 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2866 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2867 		setbirthtime = 1;
2868 	VATTR_NULL(&vattr);
2869 	vattr.va_atime = ts[0];
2870 	vattr.va_mtime = ts[1];
2871 	if (setbirthtime)
2872 		vattr.va_birthtime = ts[1];
2873 	if (numtimes > 2)
2874 		vattr.va_birthtime = ts[2];
2875 	if (nullflag)
2876 		vattr.va_vaflags |= VA_UTIMES_NULL;
2877 #ifdef MAC
2878 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
2879 	    vattr.va_mtime);
2880 #endif
2881 	if (error == 0)
2882 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2883 	VOP_UNLOCK(vp, 0);
2884 	vn_finished_write(mp);
2885 	return (error);
2886 }
2887 
2888 /*
2889  * Set the access and modification times of a file.
2890  */
2891 #ifndef _SYS_SYSPROTO_H_
2892 struct utimes_args {
2893 	char	*path;
2894 	struct	timeval *tptr;
2895 };
2896 #endif
2897 int
2898 utimes(td, uap)
2899 	struct thread *td;
2900 	register struct utimes_args /* {
2901 		char *path;
2902 		struct timeval *tptr;
2903 	} */ *uap;
2904 {
2905 
2906 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2907 	    UIO_USERSPACE));
2908 }
2909 
2910 int
2911 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2912     struct timeval *tptr, enum uio_seg tptrseg)
2913 {
2914 	struct timespec ts[2];
2915 	int error;
2916 	struct nameidata nd;
2917 	int vfslocked;
2918 
2919 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2920 		return (error);
2921 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2922 	if ((error = namei(&nd)) != 0)
2923 		return (error);
2924 	vfslocked = NDHASGIANT(&nd);
2925 	NDFREE(&nd, NDF_ONLY_PNBUF);
2926 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2927 	vrele(nd.ni_vp);
2928 	VFS_UNLOCK_GIANT(vfslocked);
2929 	return (error);
2930 }
2931 
2932 /*
2933  * Set the access and modification times of a file.
2934  */
2935 #ifndef _SYS_SYSPROTO_H_
2936 struct lutimes_args {
2937 	char	*path;
2938 	struct	timeval *tptr;
2939 };
2940 #endif
2941 int
2942 lutimes(td, uap)
2943 	struct thread *td;
2944 	register struct lutimes_args /* {
2945 		char *path;
2946 		struct timeval *tptr;
2947 	} */ *uap;
2948 {
2949 
2950 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2951 	    UIO_USERSPACE));
2952 }
2953 
2954 int
2955 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2956     struct timeval *tptr, enum uio_seg tptrseg)
2957 {
2958 	struct timespec ts[2];
2959 	int error;
2960 	struct nameidata nd;
2961 	int vfslocked;
2962 
2963 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2964 		return (error);
2965 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2966 	if ((error = namei(&nd)) != 0)
2967 		return (error);
2968 	vfslocked = NDHASGIANT(&nd);
2969 	NDFREE(&nd, NDF_ONLY_PNBUF);
2970 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2971 	vrele(nd.ni_vp);
2972 	VFS_UNLOCK_GIANT(vfslocked);
2973 	return (error);
2974 }
2975 
2976 /*
2977  * Set the access and modification times of a file.
2978  */
2979 #ifndef _SYS_SYSPROTO_H_
2980 struct futimes_args {
2981 	int	fd;
2982 	struct	timeval *tptr;
2983 };
2984 #endif
2985 int
2986 futimes(td, uap)
2987 	struct thread *td;
2988 	register struct futimes_args /* {
2989 		int  fd;
2990 		struct timeval *tptr;
2991 	} */ *uap;
2992 {
2993 
2994 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2995 }
2996 
2997 int
2998 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2999     enum uio_seg tptrseg)
3000 {
3001 	struct timespec ts[2];
3002 	struct file *fp;
3003 	int vfslocked;
3004 	int error;
3005 
3006 	AUDIT_ARG(fd, fd);
3007 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3008 		return (error);
3009 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
3010 		return (error);
3011 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3012 #ifdef AUDIT
3013 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY);
3014 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
3015 	VOP_UNLOCK(fp->f_vnode, 0);
3016 #endif
3017 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3018 	VFS_UNLOCK_GIANT(vfslocked);
3019 	fdrop(fp, td);
3020 	return (error);
3021 }
3022 
3023 /*
3024  * Truncate a file given its path name.
3025  */
3026 #ifndef _SYS_SYSPROTO_H_
3027 struct truncate_args {
3028 	char	*path;
3029 	int	pad;
3030 	off_t	length;
3031 };
3032 #endif
3033 int
3034 truncate(td, uap)
3035 	struct thread *td;
3036 	register struct truncate_args /* {
3037 		char *path;
3038 		int pad;
3039 		off_t length;
3040 	} */ *uap;
3041 {
3042 
3043 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3044 }
3045 
3046 int
3047 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3048 {
3049 	struct mount *mp;
3050 	struct vnode *vp;
3051 	struct vattr vattr;
3052 	int error;
3053 	struct nameidata nd;
3054 	int vfslocked;
3055 
3056 	if (length < 0)
3057 		return(EINVAL);
3058 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3059 	if ((error = namei(&nd)) != 0)
3060 		return (error);
3061 	vfslocked = NDHASGIANT(&nd);
3062 	vp = nd.ni_vp;
3063 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3064 		vrele(vp);
3065 		VFS_UNLOCK_GIANT(vfslocked);
3066 		return (error);
3067 	}
3068 	NDFREE(&nd, NDF_ONLY_PNBUF);
3069 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3070 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3071 	if (vp->v_type == VDIR)
3072 		error = EISDIR;
3073 #ifdef MAC
3074 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3075 	}
3076 #endif
3077 	else if ((error = vn_writechk(vp)) == 0 &&
3078 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3079 		VATTR_NULL(&vattr);
3080 		vattr.va_size = length;
3081 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3082 	}
3083 	vput(vp);
3084 	vn_finished_write(mp);
3085 	VFS_UNLOCK_GIANT(vfslocked);
3086 	return (error);
3087 }
3088 
3089 #if defined(COMPAT_43)
3090 /*
3091  * Truncate a file given its path name.
3092  */
3093 #ifndef _SYS_SYSPROTO_H_
3094 struct otruncate_args {
3095 	char	*path;
3096 	long	length;
3097 };
3098 #endif
3099 int
3100 otruncate(td, uap)
3101 	struct thread *td;
3102 	register struct otruncate_args /* {
3103 		char *path;
3104 		long length;
3105 	} */ *uap;
3106 {
3107 	struct truncate_args /* {
3108 		char *path;
3109 		int pad;
3110 		off_t length;
3111 	} */ nuap;
3112 
3113 	nuap.path = uap->path;
3114 	nuap.length = uap->length;
3115 	return (truncate(td, &nuap));
3116 }
3117 #endif /* COMPAT_43 */
3118 
3119 /* Versions with the pad argument */
3120 int
3121 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3122 {
3123 	struct truncate_args ouap;
3124 
3125 	ouap.path = uap->path;
3126 	ouap.length = uap->length;
3127 	return (truncate(td, &ouap));
3128 }
3129 
3130 int
3131 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3132 {
3133 	struct ftruncate_args ouap;
3134 
3135 	ouap.fd = uap->fd;
3136 	ouap.length = uap->length;
3137 	return (ftruncate(td, &ouap));
3138 }
3139 
3140 /*
3141  * Sync an open file.
3142  */
3143 #ifndef _SYS_SYSPROTO_H_
3144 struct fsync_args {
3145 	int	fd;
3146 };
3147 #endif
3148 int
3149 fsync(td, uap)
3150 	struct thread *td;
3151 	struct fsync_args /* {
3152 		int fd;
3153 	} */ *uap;
3154 {
3155 	struct vnode *vp;
3156 	struct mount *mp;
3157 	struct file *fp;
3158 	int vfslocked;
3159 	int error;
3160 
3161 	AUDIT_ARG(fd, uap->fd);
3162 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3163 		return (error);
3164 	vp = fp->f_vnode;
3165 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3166 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3167 		goto drop;
3168 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3169 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3170 	if (vp->v_object != NULL) {
3171 		VM_OBJECT_LOCK(vp->v_object);
3172 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3173 		VM_OBJECT_UNLOCK(vp->v_object);
3174 	}
3175 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3176 
3177 	VOP_UNLOCK(vp, 0);
3178 	vn_finished_write(mp);
3179 drop:
3180 	VFS_UNLOCK_GIANT(vfslocked);
3181 	fdrop(fp, td);
3182 	return (error);
3183 }
3184 
3185 /*
3186  * Rename files.  Source and destination must either both be directories, or
3187  * both not be directories.  If target is a directory, it must be empty.
3188  */
3189 #ifndef _SYS_SYSPROTO_H_
3190 struct rename_args {
3191 	char	*from;
3192 	char	*to;
3193 };
3194 #endif
3195 int
3196 rename(td, uap)
3197 	struct thread *td;
3198 	register struct rename_args /* {
3199 		char *from;
3200 		char *to;
3201 	} */ *uap;
3202 {
3203 
3204 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3205 }
3206 
3207 int
3208 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3209 {
3210 	struct mount *mp = NULL;
3211 	struct vnode *tvp, *fvp, *tdvp;
3212 	struct nameidata fromnd, tond;
3213 	int tvfslocked;
3214 	int fvfslocked;
3215 	int error;
3216 
3217 	bwillwrite();
3218 #ifdef MAC
3219 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3220 	    AUDITVNODE1, pathseg, from, td);
3221 #else
3222 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3223 	    AUDITVNODE1, pathseg, from, td);
3224 #endif
3225 	if ((error = namei(&fromnd)) != 0)
3226 		return (error);
3227 	fvfslocked = NDHASGIANT(&fromnd);
3228 	tvfslocked = 0;
3229 #ifdef MAC
3230 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3231 	    fromnd.ni_vp, &fromnd.ni_cnd);
3232 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3233 	if (fromnd.ni_dvp != fromnd.ni_vp)
3234 		VOP_UNLOCK(fromnd.ni_vp, 0);
3235 #endif
3236 	fvp = fromnd.ni_vp;
3237 	if (error == 0)
3238 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3239 	if (error != 0) {
3240 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3241 		vrele(fromnd.ni_dvp);
3242 		vrele(fvp);
3243 		goto out1;
3244 	}
3245 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3246 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3247 	if (fromnd.ni_vp->v_type == VDIR)
3248 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3249 	if ((error = namei(&tond)) != 0) {
3250 		/* Translate error code for rename("dir1", "dir2/."). */
3251 		if (error == EISDIR && fvp->v_type == VDIR)
3252 			error = EINVAL;
3253 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3254 		vrele(fromnd.ni_dvp);
3255 		vrele(fvp);
3256 		vn_finished_write(mp);
3257 		goto out1;
3258 	}
3259 	tvfslocked = NDHASGIANT(&tond);
3260 	tdvp = tond.ni_dvp;
3261 	tvp = tond.ni_vp;
3262 	if (tvp != NULL) {
3263 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3264 			error = ENOTDIR;
3265 			goto out;
3266 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3267 			error = EISDIR;
3268 			goto out;
3269 		}
3270 	}
3271 	if (fvp == tdvp)
3272 		error = EINVAL;
3273 	/*
3274 	 * If the source is the same as the destination (that is, if they
3275 	 * are links to the same vnode), then there is nothing to do.
3276 	 */
3277 	if (fvp == tvp)
3278 		error = -1;
3279 #ifdef MAC
3280 	else
3281 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3282 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3283 #endif
3284 out:
3285 	if (!error) {
3286 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3287 		if (fromnd.ni_dvp != tdvp) {
3288 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3289 		}
3290 		if (tvp) {
3291 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3292 		}
3293 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3294 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3295 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3296 		NDFREE(&tond, NDF_ONLY_PNBUF);
3297 	} else {
3298 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3299 		NDFREE(&tond, NDF_ONLY_PNBUF);
3300 		if (tvp)
3301 			vput(tvp);
3302 		if (tdvp == tvp)
3303 			vrele(tdvp);
3304 		else
3305 			vput(tdvp);
3306 		vrele(fromnd.ni_dvp);
3307 		vrele(fvp);
3308 	}
3309 	vrele(tond.ni_startdir);
3310 	vn_finished_write(mp);
3311 out1:
3312 	if (fromnd.ni_startdir)
3313 		vrele(fromnd.ni_startdir);
3314 	VFS_UNLOCK_GIANT(fvfslocked);
3315 	VFS_UNLOCK_GIANT(tvfslocked);
3316 	if (error == -1)
3317 		return (0);
3318 	return (error);
3319 }
3320 
3321 /*
3322  * Make a directory file.
3323  */
3324 #ifndef _SYS_SYSPROTO_H_
3325 struct mkdir_args {
3326 	char	*path;
3327 	int	mode;
3328 };
3329 #endif
3330 int
3331 mkdir(td, uap)
3332 	struct thread *td;
3333 	register struct mkdir_args /* {
3334 		char *path;
3335 		int mode;
3336 	} */ *uap;
3337 {
3338 
3339 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3340 }
3341 
3342 int
3343 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3344 {
3345 	struct mount *mp;
3346 	struct vnode *vp;
3347 	struct vattr vattr;
3348 	int error;
3349 	struct nameidata nd;
3350 	int vfslocked;
3351 
3352 	AUDIT_ARG(mode, mode);
3353 restart:
3354 	bwillwrite();
3355 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3356 	    segflg, path, td);
3357 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3358 	if ((error = namei(&nd)) != 0)
3359 		return (error);
3360 	vfslocked = NDHASGIANT(&nd);
3361 	vp = nd.ni_vp;
3362 	if (vp != NULL) {
3363 		NDFREE(&nd, NDF_ONLY_PNBUF);
3364 		/*
3365 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3366 		 * the strange behaviour of leaving the vnode unlocked
3367 		 * if the target is the same vnode as the parent.
3368 		 */
3369 		if (vp == nd.ni_dvp)
3370 			vrele(nd.ni_dvp);
3371 		else
3372 			vput(nd.ni_dvp);
3373 		vrele(vp);
3374 		VFS_UNLOCK_GIANT(vfslocked);
3375 		return (EEXIST);
3376 	}
3377 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3378 		NDFREE(&nd, NDF_ONLY_PNBUF);
3379 		vput(nd.ni_dvp);
3380 		VFS_UNLOCK_GIANT(vfslocked);
3381 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3382 			return (error);
3383 		goto restart;
3384 	}
3385 	VATTR_NULL(&vattr);
3386 	vattr.va_type = VDIR;
3387 	FILEDESC_SLOCK(td->td_proc->p_fd);
3388 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3389 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3390 #ifdef MAC
3391 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3392 	    &vattr);
3393 	if (error)
3394 		goto out;
3395 #endif
3396 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3397 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3398 #ifdef MAC
3399 out:
3400 #endif
3401 	NDFREE(&nd, NDF_ONLY_PNBUF);
3402 	vput(nd.ni_dvp);
3403 	if (!error)
3404 		vput(nd.ni_vp);
3405 	vn_finished_write(mp);
3406 	VFS_UNLOCK_GIANT(vfslocked);
3407 	return (error);
3408 }
3409 
3410 /*
3411  * Remove a directory file.
3412  */
3413 #ifndef _SYS_SYSPROTO_H_
3414 struct rmdir_args {
3415 	char	*path;
3416 };
3417 #endif
3418 int
3419 rmdir(td, uap)
3420 	struct thread *td;
3421 	struct rmdir_args /* {
3422 		char *path;
3423 	} */ *uap;
3424 {
3425 
3426 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3427 }
3428 
3429 int
3430 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3431 {
3432 	struct mount *mp;
3433 	struct vnode *vp;
3434 	int error;
3435 	struct nameidata nd;
3436 	int vfslocked;
3437 
3438 restart:
3439 	bwillwrite();
3440 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3441 	    pathseg, path, td);
3442 	if ((error = namei(&nd)) != 0)
3443 		return (error);
3444 	vfslocked = NDHASGIANT(&nd);
3445 	vp = nd.ni_vp;
3446 	if (vp->v_type != VDIR) {
3447 		error = ENOTDIR;
3448 		goto out;
3449 	}
3450 	/*
3451 	 * No rmdir "." please.
3452 	 */
3453 	if (nd.ni_dvp == vp) {
3454 		error = EINVAL;
3455 		goto out;
3456 	}
3457 	/*
3458 	 * The root of a mounted filesystem cannot be deleted.
3459 	 */
3460 	if (vp->v_vflag & VV_ROOT) {
3461 		error = EBUSY;
3462 		goto out;
3463 	}
3464 #ifdef MAC
3465 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3466 	    &nd.ni_cnd);
3467 	if (error)
3468 		goto out;
3469 #endif
3470 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3471 		NDFREE(&nd, NDF_ONLY_PNBUF);
3472 		vput(vp);
3473 		if (nd.ni_dvp == vp)
3474 			vrele(nd.ni_dvp);
3475 		else
3476 			vput(nd.ni_dvp);
3477 		VFS_UNLOCK_GIANT(vfslocked);
3478 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3479 			return (error);
3480 		goto restart;
3481 	}
3482 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3483 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3484 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3485 	vn_finished_write(mp);
3486 out:
3487 	NDFREE(&nd, NDF_ONLY_PNBUF);
3488 	vput(vp);
3489 	if (nd.ni_dvp == vp)
3490 		vrele(nd.ni_dvp);
3491 	else
3492 		vput(nd.ni_dvp);
3493 	VFS_UNLOCK_GIANT(vfslocked);
3494 	return (error);
3495 }
3496 
3497 #ifdef COMPAT_43
3498 /*
3499  * Read a block of directory entries in a filesystem independent format.
3500  */
3501 #ifndef _SYS_SYSPROTO_H_
3502 struct ogetdirentries_args {
3503 	int	fd;
3504 	char	*buf;
3505 	u_int	count;
3506 	long	*basep;
3507 };
3508 #endif
3509 int
3510 ogetdirentries(td, uap)
3511 	struct thread *td;
3512 	register struct ogetdirentries_args /* {
3513 		int fd;
3514 		char *buf;
3515 		u_int count;
3516 		long *basep;
3517 	} */ *uap;
3518 {
3519 	struct vnode *vp;
3520 	struct file *fp;
3521 	struct uio auio, kuio;
3522 	struct iovec aiov, kiov;
3523 	struct dirent *dp, *edp;
3524 	caddr_t dirbuf;
3525 	int error, eofflag, readcnt, vfslocked;
3526 	long loff;
3527 
3528 	/* XXX arbitrary sanity limit on `count'. */
3529 	if (uap->count > 64 * 1024)
3530 		return (EINVAL);
3531 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3532 		return (error);
3533 	if ((fp->f_flag & FREAD) == 0) {
3534 		fdrop(fp, td);
3535 		return (EBADF);
3536 	}
3537 	vp = fp->f_vnode;
3538 unionread:
3539 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3540 	if (vp->v_type != VDIR) {
3541 		VFS_UNLOCK_GIANT(vfslocked);
3542 		fdrop(fp, td);
3543 		return (EINVAL);
3544 	}
3545 	aiov.iov_base = uap->buf;
3546 	aiov.iov_len = uap->count;
3547 	auio.uio_iov = &aiov;
3548 	auio.uio_iovcnt = 1;
3549 	auio.uio_rw = UIO_READ;
3550 	auio.uio_segflg = UIO_USERSPACE;
3551 	auio.uio_td = td;
3552 	auio.uio_resid = uap->count;
3553 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3554 	loff = auio.uio_offset = fp->f_offset;
3555 #ifdef MAC
3556 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3557 	if (error) {
3558 		VOP_UNLOCK(vp, 0);
3559 		VFS_UNLOCK_GIANT(vfslocked);
3560 		fdrop(fp, td);
3561 		return (error);
3562 	}
3563 #endif
3564 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3565 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3566 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3567 			    NULL, NULL);
3568 			fp->f_offset = auio.uio_offset;
3569 		} else
3570 #	endif
3571 	{
3572 		kuio = auio;
3573 		kuio.uio_iov = &kiov;
3574 		kuio.uio_segflg = UIO_SYSSPACE;
3575 		kiov.iov_len = uap->count;
3576 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3577 		kiov.iov_base = dirbuf;
3578 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3579 			    NULL, NULL);
3580 		fp->f_offset = kuio.uio_offset;
3581 		if (error == 0) {
3582 			readcnt = uap->count - kuio.uio_resid;
3583 			edp = (struct dirent *)&dirbuf[readcnt];
3584 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3585 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3586 					/*
3587 					 * The expected low byte of
3588 					 * dp->d_namlen is our dp->d_type.
3589 					 * The high MBZ byte of dp->d_namlen
3590 					 * is our dp->d_namlen.
3591 					 */
3592 					dp->d_type = dp->d_namlen;
3593 					dp->d_namlen = 0;
3594 #				else
3595 					/*
3596 					 * The dp->d_type is the high byte
3597 					 * of the expected dp->d_namlen,
3598 					 * so must be zero'ed.
3599 					 */
3600 					dp->d_type = 0;
3601 #				endif
3602 				if (dp->d_reclen > 0) {
3603 					dp = (struct dirent *)
3604 					    ((char *)dp + dp->d_reclen);
3605 				} else {
3606 					error = EIO;
3607 					break;
3608 				}
3609 			}
3610 			if (dp >= edp)
3611 				error = uiomove(dirbuf, readcnt, &auio);
3612 		}
3613 		FREE(dirbuf, M_TEMP);
3614 	}
3615 	if (error) {
3616 		VOP_UNLOCK(vp, 0);
3617 		VFS_UNLOCK_GIANT(vfslocked);
3618 		fdrop(fp, td);
3619 		return (error);
3620 	}
3621 	if (uap->count == auio.uio_resid &&
3622 	    (vp->v_vflag & VV_ROOT) &&
3623 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3624 		struct vnode *tvp = vp;
3625 		vp = vp->v_mount->mnt_vnodecovered;
3626 		VREF(vp);
3627 		fp->f_vnode = vp;
3628 		fp->f_data = vp;
3629 		fp->f_offset = 0;
3630 		vput(tvp);
3631 		VFS_UNLOCK_GIANT(vfslocked);
3632 		goto unionread;
3633 	}
3634 	VOP_UNLOCK(vp, 0);
3635 	VFS_UNLOCK_GIANT(vfslocked);
3636 	error = copyout(&loff, uap->basep, sizeof(long));
3637 	fdrop(fp, td);
3638 	td->td_retval[0] = uap->count - auio.uio_resid;
3639 	return (error);
3640 }
3641 #endif /* COMPAT_43 */
3642 
3643 /*
3644  * Read a block of directory entries in a filesystem independent format.
3645  */
3646 #ifndef _SYS_SYSPROTO_H_
3647 struct getdirentries_args {
3648 	int	fd;
3649 	char	*buf;
3650 	u_int	count;
3651 	long	*basep;
3652 };
3653 #endif
3654 int
3655 getdirentries(td, uap)
3656 	struct thread *td;
3657 	register struct getdirentries_args /* {
3658 		int fd;
3659 		char *buf;
3660 		u_int count;
3661 		long *basep;
3662 	} */ *uap;
3663 {
3664 	struct vnode *vp;
3665 	struct file *fp;
3666 	struct uio auio;
3667 	struct iovec aiov;
3668 	int vfslocked;
3669 	long loff;
3670 	int error, eofflag;
3671 
3672 	AUDIT_ARG(fd, uap->fd);
3673 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3674 		return (error);
3675 	if ((fp->f_flag & FREAD) == 0) {
3676 		fdrop(fp, td);
3677 		return (EBADF);
3678 	}
3679 	vp = fp->f_vnode;
3680 unionread:
3681 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3682 	if (vp->v_type != VDIR) {
3683 		VFS_UNLOCK_GIANT(vfslocked);
3684 		error = EINVAL;
3685 		goto fail;
3686 	}
3687 	aiov.iov_base = uap->buf;
3688 	aiov.iov_len = uap->count;
3689 	auio.uio_iov = &aiov;
3690 	auio.uio_iovcnt = 1;
3691 	auio.uio_rw = UIO_READ;
3692 	auio.uio_segflg = UIO_USERSPACE;
3693 	auio.uio_td = td;
3694 	auio.uio_resid = uap->count;
3695 	/* vn_lock(vp, LK_SHARED | LK_RETRY); */
3696 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3697 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3698 	loff = auio.uio_offset = fp->f_offset;
3699 #ifdef MAC
3700 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3701 	if (error == 0)
3702 #endif
3703 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3704 		    NULL);
3705 	fp->f_offset = auio.uio_offset;
3706 	if (error) {
3707 		VOP_UNLOCK(vp, 0);
3708 		VFS_UNLOCK_GIANT(vfslocked);
3709 		goto fail;
3710 	}
3711 	if (uap->count == auio.uio_resid &&
3712 	    (vp->v_vflag & VV_ROOT) &&
3713 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3714 		struct vnode *tvp = vp;
3715 		vp = vp->v_mount->mnt_vnodecovered;
3716 		VREF(vp);
3717 		fp->f_vnode = vp;
3718 		fp->f_data = vp;
3719 		fp->f_offset = 0;
3720 		vput(tvp);
3721 		VFS_UNLOCK_GIANT(vfslocked);
3722 		goto unionread;
3723 	}
3724 	VOP_UNLOCK(vp, 0);
3725 	VFS_UNLOCK_GIANT(vfslocked);
3726 	if (uap->basep != NULL) {
3727 		error = copyout(&loff, uap->basep, sizeof(long));
3728 	}
3729 	td->td_retval[0] = uap->count - auio.uio_resid;
3730 fail:
3731 	fdrop(fp, td);
3732 	return (error);
3733 }
3734 
3735 #ifndef _SYS_SYSPROTO_H_
3736 struct getdents_args {
3737 	int fd;
3738 	char *buf;
3739 	size_t count;
3740 };
3741 #endif
3742 int
3743 getdents(td, uap)
3744 	struct thread *td;
3745 	register struct getdents_args /* {
3746 		int fd;
3747 		char *buf;
3748 		u_int count;
3749 	} */ *uap;
3750 {
3751 	struct getdirentries_args ap;
3752 	ap.fd = uap->fd;
3753 	ap.buf = uap->buf;
3754 	ap.count = uap->count;
3755 	ap.basep = NULL;
3756 	return (getdirentries(td, &ap));
3757 }
3758 
3759 /*
3760  * Set the mode mask for creation of filesystem nodes.
3761  */
3762 #ifndef _SYS_SYSPROTO_H_
3763 struct umask_args {
3764 	int	newmask;
3765 };
3766 #endif
3767 int
3768 umask(td, uap)
3769 	struct thread *td;
3770 	struct umask_args /* {
3771 		int newmask;
3772 	} */ *uap;
3773 {
3774 	register struct filedesc *fdp;
3775 
3776 	FILEDESC_XLOCK(td->td_proc->p_fd);
3777 	fdp = td->td_proc->p_fd;
3778 	td->td_retval[0] = fdp->fd_cmask;
3779 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3780 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
3781 	return (0);
3782 }
3783 
3784 /*
3785  * Void all references to file by ripping underlying filesystem away from
3786  * vnode.
3787  */
3788 #ifndef _SYS_SYSPROTO_H_
3789 struct revoke_args {
3790 	char	*path;
3791 };
3792 #endif
3793 int
3794 revoke(td, uap)
3795 	struct thread *td;
3796 	register struct revoke_args /* {
3797 		char *path;
3798 	} */ *uap;
3799 {
3800 	struct vnode *vp;
3801 	struct vattr vattr;
3802 	int error;
3803 	struct nameidata nd;
3804 	int vfslocked;
3805 
3806 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3807 	    UIO_USERSPACE, uap->path, td);
3808 	if ((error = namei(&nd)) != 0)
3809 		return (error);
3810 	vfslocked = NDHASGIANT(&nd);
3811 	vp = nd.ni_vp;
3812 	NDFREE(&nd, NDF_ONLY_PNBUF);
3813 	if (vp->v_type != VCHR) {
3814 		error = EINVAL;
3815 		goto out;
3816 	}
3817 #ifdef MAC
3818 	error = mac_vnode_check_revoke(td->td_ucred, vp);
3819 	if (error)
3820 		goto out;
3821 #endif
3822 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3823 	if (error)
3824 		goto out;
3825 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3826 		error = priv_check(td, PRIV_VFS_ADMIN);
3827 		if (error)
3828 			goto out;
3829 	}
3830 	if (vcount(vp) > 1)
3831 		VOP_REVOKE(vp, REVOKEALL);
3832 out:
3833 	vput(vp);
3834 	VFS_UNLOCK_GIANT(vfslocked);
3835 	return (error);
3836 }
3837 
3838 /*
3839  * Convert a user file descriptor to a kernel file entry.
3840  * A reference on the file entry is held upon returning.
3841  */
3842 int
3843 getvnode(fdp, fd, fpp)
3844 	struct filedesc *fdp;
3845 	int fd;
3846 	struct file **fpp;
3847 {
3848 	int error;
3849 	struct file *fp;
3850 
3851 	fp = NULL;
3852 	if (fdp == NULL)
3853 		error = EBADF;
3854 	else {
3855 		FILEDESC_SLOCK(fdp);
3856 		if ((u_int)fd >= fdp->fd_nfiles ||
3857 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3858 			error = EBADF;
3859 		else if (fp->f_vnode == NULL) {
3860 			fp = NULL;
3861 			error = EINVAL;
3862 		} else {
3863 			fhold(fp);
3864 			error = 0;
3865 		}
3866 		FILEDESC_SUNLOCK(fdp);
3867 	}
3868 	*fpp = fp;
3869 	return (error);
3870 }
3871 
3872 /*
3873  * Get an (NFS) file handle.
3874  */
3875 #ifndef _SYS_SYSPROTO_H_
3876 struct lgetfh_args {
3877 	char	*fname;
3878 	fhandle_t *fhp;
3879 };
3880 #endif
3881 int
3882 lgetfh(td, uap)
3883 	struct thread *td;
3884 	register struct lgetfh_args *uap;
3885 {
3886 	struct nameidata nd;
3887 	fhandle_t fh;
3888 	register struct vnode *vp;
3889 	int vfslocked;
3890 	int error;
3891 
3892 	error = priv_check(td, PRIV_VFS_GETFH);
3893 	if (error)
3894 		return (error);
3895 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3896 	    UIO_USERSPACE, uap->fname, td);
3897 	error = namei(&nd);
3898 	if (error)
3899 		return (error);
3900 	vfslocked = NDHASGIANT(&nd);
3901 	NDFREE(&nd, NDF_ONLY_PNBUF);
3902 	vp = nd.ni_vp;
3903 	bzero(&fh, sizeof(fh));
3904 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3905 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3906 	vput(vp);
3907 	VFS_UNLOCK_GIANT(vfslocked);
3908 	if (error)
3909 		return (error);
3910 	error = copyout(&fh, uap->fhp, sizeof (fh));
3911 	return (error);
3912 }
3913 
3914 #ifndef _SYS_SYSPROTO_H_
3915 struct getfh_args {
3916 	char	*fname;
3917 	fhandle_t *fhp;
3918 };
3919 #endif
3920 int
3921 getfh(td, uap)
3922 	struct thread *td;
3923 	register struct getfh_args *uap;
3924 {
3925 	struct nameidata nd;
3926 	fhandle_t fh;
3927 	register struct vnode *vp;
3928 	int vfslocked;
3929 	int error;
3930 
3931 	error = priv_check(td, PRIV_VFS_GETFH);
3932 	if (error)
3933 		return (error);
3934 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3935 	    UIO_USERSPACE, uap->fname, td);
3936 	error = namei(&nd);
3937 	if (error)
3938 		return (error);
3939 	vfslocked = NDHASGIANT(&nd);
3940 	NDFREE(&nd, NDF_ONLY_PNBUF);
3941 	vp = nd.ni_vp;
3942 	bzero(&fh, sizeof(fh));
3943 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3944 	error = VOP_VPTOFH(vp, &fh.fh_fid);
3945 	vput(vp);
3946 	VFS_UNLOCK_GIANT(vfslocked);
3947 	if (error)
3948 		return (error);
3949 	error = copyout(&fh, uap->fhp, sizeof (fh));
3950 	return (error);
3951 }
3952 
3953 /*
3954  * syscall for the rpc.lockd to use to translate a NFS file handle into an
3955  * open descriptor.
3956  *
3957  * warning: do not remove the priv_check() call or this becomes one giant
3958  * security hole.
3959  */
3960 #ifndef _SYS_SYSPROTO_H_
3961 struct fhopen_args {
3962 	const struct fhandle *u_fhp;
3963 	int flags;
3964 };
3965 #endif
3966 int
3967 fhopen(td, uap)
3968 	struct thread *td;
3969 	struct fhopen_args /* {
3970 		const struct fhandle *u_fhp;
3971 		int flags;
3972 	} */ *uap;
3973 {
3974 	struct proc *p = td->td_proc;
3975 	struct mount *mp;
3976 	struct vnode *vp;
3977 	struct fhandle fhp;
3978 	struct vattr vat;
3979 	struct vattr *vap = &vat;
3980 	struct flock lf;
3981 	struct file *fp;
3982 	register struct filedesc *fdp = p->p_fd;
3983 	int fmode, mode, error, type;
3984 	struct file *nfp;
3985 	int vfslocked;
3986 	int indx;
3987 
3988 	error = priv_check(td, PRIV_VFS_FHOPEN);
3989 	if (error)
3990 		return (error);
3991 	fmode = FFLAGS(uap->flags);
3992 	/* why not allow a non-read/write open for our lockd? */
3993 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3994 		return (EINVAL);
3995 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3996 	if (error)
3997 		return(error);
3998 	/* find the mount point */
3999 	mp = vfs_getvfs(&fhp.fh_fsid);
4000 	if (mp == NULL)
4001 		return (ESTALE);
4002 	vfslocked = VFS_LOCK_GIANT(mp);
4003 	/* now give me my vnode, it gets returned to me locked */
4004 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4005 	if (error)
4006 		goto out;
4007 	/*
4008 	 * from now on we have to make sure not
4009 	 * to forget about the vnode
4010 	 * any error that causes an abort must vput(vp)
4011 	 * just set error = err and 'goto bad;'.
4012 	 */
4013 
4014 	/*
4015 	 * from vn_open
4016 	 */
4017 	if (vp->v_type == VLNK) {
4018 		error = EMLINK;
4019 		goto bad;
4020 	}
4021 	if (vp->v_type == VSOCK) {
4022 		error = EOPNOTSUPP;
4023 		goto bad;
4024 	}
4025 	mode = 0;
4026 	if (fmode & (FWRITE | O_TRUNC)) {
4027 		if (vp->v_type == VDIR) {
4028 			error = EISDIR;
4029 			goto bad;
4030 		}
4031 		error = vn_writechk(vp);
4032 		if (error)
4033 			goto bad;
4034 		mode |= VWRITE;
4035 	}
4036 	if (fmode & FREAD)
4037 		mode |= VREAD;
4038 	if (fmode & O_APPEND)
4039 		mode |= VAPPEND;
4040 #ifdef MAC
4041 	error = mac_vnode_check_open(td->td_ucred, vp, mode);
4042 	if (error)
4043 		goto bad;
4044 #endif
4045 	if (mode) {
4046 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4047 		if (error)
4048 			goto bad;
4049 	}
4050 	if (fmode & O_TRUNC) {
4051 		VOP_UNLOCK(vp, 0);				/* XXX */
4052 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4053 			vrele(vp);
4054 			goto out;
4055 		}
4056 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4057 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4058 #ifdef MAC
4059 		/*
4060 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4061 		 * should be right.
4062 		 */
4063 		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4064 		if (error == 0) {
4065 #endif
4066 			VATTR_NULL(vap);
4067 			vap->va_size = 0;
4068 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4069 #ifdef MAC
4070 		}
4071 #endif
4072 		vn_finished_write(mp);
4073 		if (error)
4074 			goto bad;
4075 	}
4076 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4077 	if (error)
4078 		goto bad;
4079 
4080 	if (fmode & FWRITE)
4081 		vp->v_writecount++;
4082 
4083 	/*
4084 	 * end of vn_open code
4085 	 */
4086 
4087 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4088 		if (fmode & FWRITE)
4089 			vp->v_writecount--;
4090 		goto bad;
4091 	}
4092 	/* An extra reference on `nfp' has been held for us by falloc(). */
4093 	fp = nfp;
4094 	nfp->f_vnode = vp;
4095 	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4096 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4097 		lf.l_whence = SEEK_SET;
4098 		lf.l_start = 0;
4099 		lf.l_len = 0;
4100 		if (fmode & O_EXLOCK)
4101 			lf.l_type = F_WRLCK;
4102 		else
4103 			lf.l_type = F_RDLCK;
4104 		type = F_FLOCK;
4105 		if ((fmode & FNONBLOCK) == 0)
4106 			type |= F_WAIT;
4107 		VOP_UNLOCK(vp, 0);
4108 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4109 			    type)) != 0) {
4110 			/*
4111 			 * The lock request failed.  Normally close the
4112 			 * descriptor but handle the case where someone might
4113 			 * have dup()d or close()d it when we weren't looking.
4114 			 */
4115 			fdclose(fdp, fp, indx, td);
4116 
4117 			/*
4118 			 * release our private reference
4119 			 */
4120 			fdrop(fp, td);
4121 			goto out;
4122 		}
4123 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4124 		atomic_set_int(&fp->f_flag, FHASLOCK);
4125 	}
4126 
4127 	VOP_UNLOCK(vp, 0);
4128 	fdrop(fp, td);
4129 	vfs_rel(mp);
4130 	VFS_UNLOCK_GIANT(vfslocked);
4131 	td->td_retval[0] = indx;
4132 	return (0);
4133 
4134 bad:
4135 	vput(vp);
4136 out:
4137 	vfs_rel(mp);
4138 	VFS_UNLOCK_GIANT(vfslocked);
4139 	return (error);
4140 }
4141 
4142 /*
4143  * Stat an (NFS) file handle.
4144  */
4145 #ifndef _SYS_SYSPROTO_H_
4146 struct fhstat_args {
4147 	struct fhandle *u_fhp;
4148 	struct stat *sb;
4149 };
4150 #endif
4151 int
4152 fhstat(td, uap)
4153 	struct thread *td;
4154 	register struct fhstat_args /* {
4155 		struct fhandle *u_fhp;
4156 		struct stat *sb;
4157 	} */ *uap;
4158 {
4159 	struct stat sb;
4160 	fhandle_t fh;
4161 	struct mount *mp;
4162 	struct vnode *vp;
4163 	int vfslocked;
4164 	int error;
4165 
4166 	error = priv_check(td, PRIV_VFS_FHSTAT);
4167 	if (error)
4168 		return (error);
4169 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4170 	if (error)
4171 		return (error);
4172 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4173 		return (ESTALE);
4174 	vfslocked = VFS_LOCK_GIANT(mp);
4175 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4176 		vfs_rel(mp);
4177 		VFS_UNLOCK_GIANT(vfslocked);
4178 		return (error);
4179 	}
4180 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4181 	vput(vp);
4182 	vfs_rel(mp);
4183 	VFS_UNLOCK_GIANT(vfslocked);
4184 	if (error)
4185 		return (error);
4186 	error = copyout(&sb, uap->sb, sizeof(sb));
4187 	return (error);
4188 }
4189 
4190 /*
4191  * Implement fstatfs() for (NFS) file handles.
4192  */
4193 #ifndef _SYS_SYSPROTO_H_
4194 struct fhstatfs_args {
4195 	struct fhandle *u_fhp;
4196 	struct statfs *buf;
4197 };
4198 #endif
4199 int
4200 fhstatfs(td, uap)
4201 	struct thread *td;
4202 	struct fhstatfs_args /* {
4203 		struct fhandle *u_fhp;
4204 		struct statfs *buf;
4205 	} */ *uap;
4206 {
4207 	struct statfs sf;
4208 	fhandle_t fh;
4209 	int error;
4210 
4211 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4212 	if (error)
4213 		return (error);
4214 	error = kern_fhstatfs(td, fh, &sf);
4215 	if (error)
4216 		return (error);
4217 	return (copyout(&sf, uap->buf, sizeof(sf)));
4218 }
4219 
4220 int
4221 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4222 {
4223 	struct statfs *sp;
4224 	struct mount *mp;
4225 	struct vnode *vp;
4226 	int vfslocked;
4227 	int error;
4228 
4229 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4230 	if (error)
4231 		return (error);
4232 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4233 		return (ESTALE);
4234 	vfslocked = VFS_LOCK_GIANT(mp);
4235 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4236 	if (error) {
4237 		VFS_UNLOCK_GIANT(vfslocked);
4238 		vfs_rel(mp);
4239 		return (error);
4240 	}
4241 	vput(vp);
4242 	error = prison_canseemount(td->td_ucred, mp);
4243 	if (error)
4244 		goto out;
4245 #ifdef MAC
4246 	error = mac_mount_check_stat(td->td_ucred, mp);
4247 	if (error)
4248 		goto out;
4249 #endif
4250 	/*
4251 	 * Set these in case the underlying filesystem fails to do so.
4252 	 */
4253 	sp = &mp->mnt_stat;
4254 	sp->f_version = STATFS_VERSION;
4255 	sp->f_namemax = NAME_MAX;
4256 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4257 	error = VFS_STATFS(mp, sp, td);
4258 	if (error == 0)
4259 		*buf = *sp;
4260 out:
4261 	vfs_rel(mp);
4262 	VFS_UNLOCK_GIANT(vfslocked);
4263 	return (error);
4264 }
4265