xref: /freebsd/sys/kern/vfs_syscalls.c (revision 908e960ea6343acd9515d89d5d5696f9d8bf090c)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_kdtrace.h"
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bio.h>
47 #include <sys/buf.h>
48 #include <sys/sysent.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/filio.h>
59 #include <sys/limits.h>
60 #include <sys/linker.h>
61 #include <sys/sdt.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/priv.h>
67 #include <sys/proc.h>
68 #include <sys/dirent.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 
76 #include <machine/stdarg.h>
77 
78 #include <security/audit/audit.h>
79 #include <security/mac/mac_framework.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/uma.h>
85 
86 SDT_PROVIDER_DEFINE(vfs);
87 SDT_PROBE_DEFINE(vfs, , stat, mode);
88 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
89 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
90 SDT_PROBE_DEFINE(vfs, , stat, reg);
91 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
92 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
93 
94 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
96 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
97 static int setfmode(struct thread *td, struct vnode *, int);
98 static int setfflags(struct thread *td, struct vnode *, int);
99 static int setutimes(struct thread *td, struct vnode *,
100     const struct timespec *, int, int);
101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
102     struct thread *td);
103 
104 /*
105  * The module initialization routine for POSIX asynchronous I/O will
106  * set this to the version of AIO that it implements.  (Zero means
107  * that it is not implemented.)  This value is used here by pathconf()
108  * and in kern_descrip.c by fpathconf().
109  */
110 int async_io_version;
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /*
118  * Sync each mounted filesystem.
119  */
120 #ifndef _SYS_SYSPROTO_H_
121 struct sync_args {
122 	int     dummy;
123 };
124 #endif
125 /* ARGSUSED */
126 int
127 sync(td, uap)
128 	struct thread *td;
129 	struct sync_args *uap;
130 {
131 	struct mount *mp, *nmp;
132 	int vfslocked;
133 
134 	mtx_lock(&mountlist_mtx);
135 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
136 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
137 			nmp = TAILQ_NEXT(mp, mnt_list);
138 			continue;
139 		}
140 		vfslocked = VFS_LOCK_GIANT(mp);
141 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
142 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
143 			MNT_ILOCK(mp);
144 			mp->mnt_noasync++;
145 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
146 			MNT_IUNLOCK(mp);
147 			vfs_msync(mp, MNT_NOWAIT);
148 			VFS_SYNC(mp, MNT_NOWAIT);
149 			MNT_ILOCK(mp);
150 			mp->mnt_noasync--;
151 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
152 			    mp->mnt_noasync == 0)
153 				mp->mnt_kern_flag |= MNTK_ASYNC;
154 			MNT_IUNLOCK(mp);
155 			vn_finished_write(mp);
156 		}
157 		VFS_UNLOCK_GIANT(vfslocked);
158 		mtx_lock(&mountlist_mtx);
159 		nmp = TAILQ_NEXT(mp, mnt_list);
160 		vfs_unbusy(mp);
161 	}
162 	mtx_unlock(&mountlist_mtx);
163 	return (0);
164 }
165 
166 /*
167  * Change filesystem quotas.
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp;
188 	int vfslocked;
189 	int error;
190 	struct nameidata nd;
191 
192 	AUDIT_ARG(cmd, uap->cmd);
193 	AUDIT_ARG(uid, uap->uid);
194 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
195 		return (EPERM);
196 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
197 	   UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0)
199 		return (error);
200 	vfslocked = NDHASGIANT(&nd);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	mp = nd.ni_vp->v_mount;
203 	vfs_ref(mp);
204 	vput(nd.ni_vp);
205 	error = vfs_busy(mp, 0);
206 	vfs_rel(mp);
207 	if (error) {
208 		VFS_UNLOCK_GIANT(vfslocked);
209 		return (error);
210 	}
211 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
212 	vfs_unbusy(mp);
213 	VFS_UNLOCK_GIANT(vfslocked);
214 	return (error);
215 }
216 
217 /*
218  * Used by statfs conversion routines to scale the block size up if
219  * necessary so that all of the block counts are <= 'max_size'.  Note
220  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
221  * value of 'n'.
222  */
223 void
224 statfs_scale_blocks(struct statfs *sf, long max_size)
225 {
226 	uint64_t count;
227 	int shift;
228 
229 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
230 
231 	/*
232 	 * Attempt to scale the block counts to give a more accurate
233 	 * overview to userland of the ratio of free space to used
234 	 * space.  To do this, find the largest block count and compute
235 	 * a divisor that lets it fit into a signed integer <= max_size.
236 	 */
237 	if (sf->f_bavail < 0)
238 		count = -sf->f_bavail;
239 	else
240 		count = sf->f_bavail;
241 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
242 	if (count <= max_size)
243 		return;
244 
245 	count >>= flsl(max_size);
246 	shift = 0;
247 	while (count > 0) {
248 		shift++;
249 		count >>=1;
250 	}
251 
252 	sf->f_bsize <<= shift;
253 	sf->f_blocks >>= shift;
254 	sf->f_bfree >>= shift;
255 	sf->f_bavail >>= shift;
256 }
257 
258 /*
259  * Get filesystem statistics.
260  */
261 #ifndef _SYS_SYSPROTO_H_
262 struct statfs_args {
263 	char *path;
264 	struct statfs *buf;
265 };
266 #endif
267 int
268 statfs(td, uap)
269 	struct thread *td;
270 	register struct statfs_args /* {
271 		char *path;
272 		struct statfs *buf;
273 	} */ *uap;
274 {
275 	struct statfs sf;
276 	int error;
277 
278 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
279 	if (error == 0)
280 		error = copyout(&sf, uap->buf, sizeof(sf));
281 	return (error);
282 }
283 
284 int
285 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
286     struct statfs *buf)
287 {
288 	struct mount *mp;
289 	struct statfs *sp, sb;
290 	int vfslocked;
291 	int error;
292 	struct nameidata nd;
293 
294 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
295 	    AUDITVNODE1, pathseg, path, td);
296 	error = namei(&nd);
297 	if (error)
298 		return (error);
299 	vfslocked = NDHASGIANT(&nd);
300 	mp = nd.ni_vp->v_mount;
301 	vfs_ref(mp);
302 	NDFREE(&nd, NDF_ONLY_PNBUF);
303 	vput(nd.ni_vp);
304 	error = vfs_busy(mp, 0);
305 	vfs_rel(mp);
306 	if (error) {
307 		VFS_UNLOCK_GIANT(vfslocked);
308 		return (error);
309 	}
310 #ifdef MAC
311 	error = mac_mount_check_stat(td->td_ucred, mp);
312 	if (error)
313 		goto out;
314 #endif
315 	/*
316 	 * Set these in case the underlying filesystem fails to do so.
317 	 */
318 	sp = &mp->mnt_stat;
319 	sp->f_version = STATFS_VERSION;
320 	sp->f_namemax = NAME_MAX;
321 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
322 	error = VFS_STATFS(mp, sp);
323 	if (error)
324 		goto out;
325 	if (priv_check(td, PRIV_VFS_GENERATION)) {
326 		bcopy(sp, &sb, sizeof(sb));
327 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
328 		prison_enforce_statfs(td->td_ucred, mp, &sb);
329 		sp = &sb;
330 	}
331 	*buf = *sp;
332 out:
333 	vfs_unbusy(mp);
334 	VFS_UNLOCK_GIANT(vfslocked);
335 	return (error);
336 }
337 
338 /*
339  * Get filesystem statistics.
340  */
341 #ifndef _SYS_SYSPROTO_H_
342 struct fstatfs_args {
343 	int fd;
344 	struct statfs *buf;
345 };
346 #endif
347 int
348 fstatfs(td, uap)
349 	struct thread *td;
350 	register struct fstatfs_args /* {
351 		int fd;
352 		struct statfs *buf;
353 	} */ *uap;
354 {
355 	struct statfs sf;
356 	int error;
357 
358 	error = kern_fstatfs(td, uap->fd, &sf);
359 	if (error == 0)
360 		error = copyout(&sf, uap->buf, sizeof(sf));
361 	return (error);
362 }
363 
364 int
365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
366 {
367 	struct file *fp;
368 	struct mount *mp;
369 	struct statfs *sp, sb;
370 	int vfslocked;
371 	struct vnode *vp;
372 	int error;
373 
374 	AUDIT_ARG(fd, fd);
375 	error = getvnode(td->td_proc->p_fd, fd, &fp);
376 	if (error)
377 		return (error);
378 	vp = fp->f_vnode;
379 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
380 	vn_lock(vp, LK_SHARED | LK_RETRY);
381 #ifdef AUDIT
382 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
383 #endif
384 	mp = vp->v_mount;
385 	if (mp)
386 		vfs_ref(mp);
387 	VOP_UNLOCK(vp, 0);
388 	fdrop(fp, td);
389 	if (mp == NULL) {
390 		error = EBADF;
391 		goto out;
392 	}
393 	error = vfs_busy(mp, 0);
394 	vfs_rel(mp);
395 	if (error) {
396 		VFS_UNLOCK_GIANT(vfslocked);
397 		return (error);
398 	}
399 #ifdef MAC
400 	error = mac_mount_check_stat(td->td_ucred, mp);
401 	if (error)
402 		goto out;
403 #endif
404 	/*
405 	 * Set these in case the underlying filesystem fails to do so.
406 	 */
407 	sp = &mp->mnt_stat;
408 	sp->f_version = STATFS_VERSION;
409 	sp->f_namemax = NAME_MAX;
410 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
411 	error = VFS_STATFS(mp, sp);
412 	if (error)
413 		goto out;
414 	if (priv_check(td, PRIV_VFS_GENERATION)) {
415 		bcopy(sp, &sb, sizeof(sb));
416 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
417 		prison_enforce_statfs(td->td_ucred, mp, &sb);
418 		sp = &sb;
419 	}
420 	*buf = *sp;
421 out:
422 	if (mp)
423 		vfs_unbusy(mp);
424 	VFS_UNLOCK_GIANT(vfslocked);
425 	return (error);
426 }
427 
428 /*
429  * Get statistics on all filesystems.
430  */
431 #ifndef _SYS_SYSPROTO_H_
432 struct getfsstat_args {
433 	struct statfs *buf;
434 	long bufsize;
435 	int flags;
436 };
437 #endif
438 int
439 getfsstat(td, uap)
440 	struct thread *td;
441 	register struct getfsstat_args /* {
442 		struct statfs *buf;
443 		long bufsize;
444 		int flags;
445 	} */ *uap;
446 {
447 
448 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
449 	    uap->flags));
450 }
451 
452 /*
453  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
454  * 	The caller is responsible for freeing memory which will be allocated
455  *	in '*buf'.
456  */
457 int
458 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
459     enum uio_seg bufseg, int flags)
460 {
461 	struct mount *mp, *nmp;
462 	struct statfs *sfsp, *sp, sb;
463 	size_t count, maxcount;
464 	int vfslocked;
465 	int error;
466 
467 	maxcount = bufsize / sizeof(struct statfs);
468 	if (bufsize == 0)
469 		sfsp = NULL;
470 	else if (bufseg == UIO_USERSPACE)
471 		sfsp = *buf;
472 	else /* if (bufseg == UIO_SYSSPACE) */ {
473 		count = 0;
474 		mtx_lock(&mountlist_mtx);
475 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
476 			count++;
477 		}
478 		mtx_unlock(&mountlist_mtx);
479 		if (maxcount > count)
480 			maxcount = count;
481 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
482 		    M_WAITOK);
483 	}
484 	count = 0;
485 	mtx_lock(&mountlist_mtx);
486 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
487 		if (prison_canseemount(td->td_ucred, mp) != 0) {
488 			nmp = TAILQ_NEXT(mp, mnt_list);
489 			continue;
490 		}
491 #ifdef MAC
492 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
493 			nmp = TAILQ_NEXT(mp, mnt_list);
494 			continue;
495 		}
496 #endif
497 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
498 			nmp = TAILQ_NEXT(mp, mnt_list);
499 			continue;
500 		}
501 		vfslocked = VFS_LOCK_GIANT(mp);
502 		if (sfsp && count < maxcount) {
503 			sp = &mp->mnt_stat;
504 			/*
505 			 * Set these in case the underlying filesystem
506 			 * fails to do so.
507 			 */
508 			sp->f_version = STATFS_VERSION;
509 			sp->f_namemax = NAME_MAX;
510 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
511 			/*
512 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
513 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
514 			 * overrides MNT_WAIT.
515 			 */
516 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
517 			    (flags & MNT_WAIT)) &&
518 			    (error = VFS_STATFS(mp, sp))) {
519 				VFS_UNLOCK_GIANT(vfslocked);
520 				mtx_lock(&mountlist_mtx);
521 				nmp = TAILQ_NEXT(mp, mnt_list);
522 				vfs_unbusy(mp);
523 				continue;
524 			}
525 			if (priv_check(td, PRIV_VFS_GENERATION)) {
526 				bcopy(sp, &sb, sizeof(sb));
527 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
528 				prison_enforce_statfs(td->td_ucred, mp, &sb);
529 				sp = &sb;
530 			}
531 			if (bufseg == UIO_SYSSPACE)
532 				bcopy(sp, sfsp, sizeof(*sp));
533 			else /* if (bufseg == UIO_USERSPACE) */ {
534 				error = copyout(sp, sfsp, sizeof(*sp));
535 				if (error) {
536 					vfs_unbusy(mp);
537 					VFS_UNLOCK_GIANT(vfslocked);
538 					return (error);
539 				}
540 			}
541 			sfsp++;
542 		}
543 		VFS_UNLOCK_GIANT(vfslocked);
544 		count++;
545 		mtx_lock(&mountlist_mtx);
546 		nmp = TAILQ_NEXT(mp, mnt_list);
547 		vfs_unbusy(mp);
548 	}
549 	mtx_unlock(&mountlist_mtx);
550 	if (sfsp && count > maxcount)
551 		td->td_retval[0] = maxcount;
552 	else
553 		td->td_retval[0] = count;
554 	return (0);
555 }
556 
557 #ifdef COMPAT_FREEBSD4
558 /*
559  * Get old format filesystem statistics.
560  */
561 static void cvtstatfs(struct statfs *, struct ostatfs *);
562 
563 #ifndef _SYS_SYSPROTO_H_
564 struct freebsd4_statfs_args {
565 	char *path;
566 	struct ostatfs *buf;
567 };
568 #endif
569 int
570 freebsd4_statfs(td, uap)
571 	struct thread *td;
572 	struct freebsd4_statfs_args /* {
573 		char *path;
574 		struct ostatfs *buf;
575 	} */ *uap;
576 {
577 	struct ostatfs osb;
578 	struct statfs sf;
579 	int error;
580 
581 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
582 	if (error)
583 		return (error);
584 	cvtstatfs(&sf, &osb);
585 	return (copyout(&osb, uap->buf, sizeof(osb)));
586 }
587 
588 /*
589  * Get filesystem statistics.
590  */
591 #ifndef _SYS_SYSPROTO_H_
592 struct freebsd4_fstatfs_args {
593 	int fd;
594 	struct ostatfs *buf;
595 };
596 #endif
597 int
598 freebsd4_fstatfs(td, uap)
599 	struct thread *td;
600 	struct freebsd4_fstatfs_args /* {
601 		int fd;
602 		struct ostatfs *buf;
603 	} */ *uap;
604 {
605 	struct ostatfs osb;
606 	struct statfs sf;
607 	int error;
608 
609 	error = kern_fstatfs(td, uap->fd, &sf);
610 	if (error)
611 		return (error);
612 	cvtstatfs(&sf, &osb);
613 	return (copyout(&osb, uap->buf, sizeof(osb)));
614 }
615 
616 /*
617  * Get statistics on all filesystems.
618  */
619 #ifndef _SYS_SYSPROTO_H_
620 struct freebsd4_getfsstat_args {
621 	struct ostatfs *buf;
622 	long bufsize;
623 	int flags;
624 };
625 #endif
626 int
627 freebsd4_getfsstat(td, uap)
628 	struct thread *td;
629 	register struct freebsd4_getfsstat_args /* {
630 		struct ostatfs *buf;
631 		long bufsize;
632 		int flags;
633 	} */ *uap;
634 {
635 	struct statfs *buf, *sp;
636 	struct ostatfs osb;
637 	size_t count, size;
638 	int error;
639 
640 	count = uap->bufsize / sizeof(struct ostatfs);
641 	size = count * sizeof(struct statfs);
642 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
643 	if (size > 0) {
644 		count = td->td_retval[0];
645 		sp = buf;
646 		while (count > 0 && error == 0) {
647 			cvtstatfs(sp, &osb);
648 			error = copyout(&osb, uap->buf, sizeof(osb));
649 			sp++;
650 			uap->buf++;
651 			count--;
652 		}
653 		free(buf, M_TEMP);
654 	}
655 	return (error);
656 }
657 
658 /*
659  * Implement fstatfs() for (NFS) file handles.
660  */
661 #ifndef _SYS_SYSPROTO_H_
662 struct freebsd4_fhstatfs_args {
663 	struct fhandle *u_fhp;
664 	struct ostatfs *buf;
665 };
666 #endif
667 int
668 freebsd4_fhstatfs(td, uap)
669 	struct thread *td;
670 	struct freebsd4_fhstatfs_args /* {
671 		struct fhandle *u_fhp;
672 		struct ostatfs *buf;
673 	} */ *uap;
674 {
675 	struct ostatfs osb;
676 	struct statfs sf;
677 	fhandle_t fh;
678 	int error;
679 
680 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
681 	if (error)
682 		return (error);
683 	error = kern_fhstatfs(td, fh, &sf);
684 	if (error)
685 		return (error);
686 	cvtstatfs(&sf, &osb);
687 	return (copyout(&osb, uap->buf, sizeof(osb)));
688 }
689 
690 /*
691  * Convert a new format statfs structure to an old format statfs structure.
692  */
693 static void
694 cvtstatfs(nsp, osp)
695 	struct statfs *nsp;
696 	struct ostatfs *osp;
697 {
698 
699 	statfs_scale_blocks(nsp, LONG_MAX);
700 	bzero(osp, sizeof(*osp));
701 	osp->f_bsize = nsp->f_bsize;
702 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
703 	osp->f_blocks = nsp->f_blocks;
704 	osp->f_bfree = nsp->f_bfree;
705 	osp->f_bavail = nsp->f_bavail;
706 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
707 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
708 	osp->f_owner = nsp->f_owner;
709 	osp->f_type = nsp->f_type;
710 	osp->f_flags = nsp->f_flags;
711 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
712 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
713 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
714 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
715 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
716 	    MIN(MFSNAMELEN, OMFSNAMELEN));
717 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
718 	    MIN(MNAMELEN, OMNAMELEN));
719 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
720 	    MIN(MNAMELEN, OMNAMELEN));
721 	osp->f_fsid = nsp->f_fsid;
722 }
723 #endif /* COMPAT_FREEBSD4 */
724 
725 /*
726  * Change current working directory to a given file descriptor.
727  */
728 #ifndef _SYS_SYSPROTO_H_
729 struct fchdir_args {
730 	int	fd;
731 };
732 #endif
733 int
734 fchdir(td, uap)
735 	struct thread *td;
736 	struct fchdir_args /* {
737 		int fd;
738 	} */ *uap;
739 {
740 	register struct filedesc *fdp = td->td_proc->p_fd;
741 	struct vnode *vp, *tdp, *vpold;
742 	struct mount *mp;
743 	struct file *fp;
744 	int vfslocked;
745 	int error;
746 
747 	AUDIT_ARG(fd, uap->fd);
748 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
749 		return (error);
750 	vp = fp->f_vnode;
751 	VREF(vp);
752 	fdrop(fp, td);
753 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
754 	vn_lock(vp, LK_SHARED | LK_RETRY);
755 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
756 	error = change_dir(vp, td);
757 	while (!error && (mp = vp->v_mountedhere) != NULL) {
758 		int tvfslocked;
759 		if (vfs_busy(mp, 0))
760 			continue;
761 		tvfslocked = VFS_LOCK_GIANT(mp);
762 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
763 		vfs_unbusy(mp);
764 		if (error) {
765 			VFS_UNLOCK_GIANT(tvfslocked);
766 			break;
767 		}
768 		vput(vp);
769 		VFS_UNLOCK_GIANT(vfslocked);
770 		vp = tdp;
771 		vfslocked = tvfslocked;
772 	}
773 	if (error) {
774 		vput(vp);
775 		VFS_UNLOCK_GIANT(vfslocked);
776 		return (error);
777 	}
778 	VOP_UNLOCK(vp, 0);
779 	VFS_UNLOCK_GIANT(vfslocked);
780 	FILEDESC_XLOCK(fdp);
781 	vpold = fdp->fd_cdir;
782 	fdp->fd_cdir = vp;
783 	FILEDESC_XUNLOCK(fdp);
784 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
785 	vrele(vpold);
786 	VFS_UNLOCK_GIANT(vfslocked);
787 	return (0);
788 }
789 
790 /*
791  * Change current working directory (``.'').
792  */
793 #ifndef _SYS_SYSPROTO_H_
794 struct chdir_args {
795 	char	*path;
796 };
797 #endif
798 int
799 chdir(td, uap)
800 	struct thread *td;
801 	struct chdir_args /* {
802 		char *path;
803 	} */ *uap;
804 {
805 
806 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
807 }
808 
809 int
810 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
811 {
812 	register struct filedesc *fdp = td->td_proc->p_fd;
813 	int error;
814 	struct nameidata nd;
815 	struct vnode *vp;
816 	int vfslocked;
817 
818 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
819 	    MPSAFE, pathseg, path, td);
820 	if ((error = namei(&nd)) != 0)
821 		return (error);
822 	vfslocked = NDHASGIANT(&nd);
823 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
824 		vput(nd.ni_vp);
825 		VFS_UNLOCK_GIANT(vfslocked);
826 		NDFREE(&nd, NDF_ONLY_PNBUF);
827 		return (error);
828 	}
829 	VOP_UNLOCK(nd.ni_vp, 0);
830 	VFS_UNLOCK_GIANT(vfslocked);
831 	NDFREE(&nd, NDF_ONLY_PNBUF);
832 	FILEDESC_XLOCK(fdp);
833 	vp = fdp->fd_cdir;
834 	fdp->fd_cdir = nd.ni_vp;
835 	FILEDESC_XUNLOCK(fdp);
836 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
837 	vrele(vp);
838 	VFS_UNLOCK_GIANT(vfslocked);
839 	return (0);
840 }
841 
842 /*
843  * Helper function for raised chroot(2) security function:  Refuse if
844  * any filedescriptors are open directories.
845  */
846 static int
847 chroot_refuse_vdir_fds(fdp)
848 	struct filedesc *fdp;
849 {
850 	struct vnode *vp;
851 	struct file *fp;
852 	int fd;
853 
854 	FILEDESC_LOCK_ASSERT(fdp);
855 
856 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
857 		fp = fget_locked(fdp, fd);
858 		if (fp == NULL)
859 			continue;
860 		if (fp->f_type == DTYPE_VNODE) {
861 			vp = fp->f_vnode;
862 			if (vp->v_type == VDIR)
863 				return (EPERM);
864 		}
865 	}
866 	return (0);
867 }
868 
869 /*
870  * This sysctl determines if we will allow a process to chroot(2) if it
871  * has a directory open:
872  *	0: disallowed for all processes.
873  *	1: allowed for processes that were not already chroot(2)'ed.
874  *	2: allowed for all processes.
875  */
876 
877 static int chroot_allow_open_directories = 1;
878 
879 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
880      &chroot_allow_open_directories, 0, "");
881 
882 /*
883  * Change notion of root (``/'') directory.
884  */
885 #ifndef _SYS_SYSPROTO_H_
886 struct chroot_args {
887 	char	*path;
888 };
889 #endif
890 int
891 chroot(td, uap)
892 	struct thread *td;
893 	struct chroot_args /* {
894 		char *path;
895 	} */ *uap;
896 {
897 	int error;
898 	struct nameidata nd;
899 	int vfslocked;
900 
901 	error = priv_check(td, PRIV_VFS_CHROOT);
902 	if (error)
903 		return (error);
904 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
905 	    AUDITVNODE1, UIO_USERSPACE, uap->path, td);
906 	error = namei(&nd);
907 	if (error)
908 		goto error;
909 	vfslocked = NDHASGIANT(&nd);
910 	if ((error = change_dir(nd.ni_vp, td)) != 0)
911 		goto e_vunlock;
912 #ifdef MAC
913 	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
914 		goto e_vunlock;
915 #endif
916 	VOP_UNLOCK(nd.ni_vp, 0);
917 	error = change_root(nd.ni_vp, td);
918 	vrele(nd.ni_vp);
919 	VFS_UNLOCK_GIANT(vfslocked);
920 	NDFREE(&nd, NDF_ONLY_PNBUF);
921 	return (error);
922 e_vunlock:
923 	vput(nd.ni_vp);
924 	VFS_UNLOCK_GIANT(vfslocked);
925 error:
926 	NDFREE(&nd, NDF_ONLY_PNBUF);
927 	return (error);
928 }
929 
930 /*
931  * Common routine for chroot and chdir.  Callers must provide a locked vnode
932  * instance.
933  */
934 int
935 change_dir(vp, td)
936 	struct vnode *vp;
937 	struct thread *td;
938 {
939 	int error;
940 
941 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
942 	if (vp->v_type != VDIR)
943 		return (ENOTDIR);
944 #ifdef MAC
945 	error = mac_vnode_check_chdir(td->td_ucred, vp);
946 	if (error)
947 		return (error);
948 #endif
949 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
950 	return (error);
951 }
952 
953 /*
954  * Common routine for kern_chroot() and jail_attach().  The caller is
955  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
956  * authorize this operation.
957  */
958 int
959 change_root(vp, td)
960 	struct vnode *vp;
961 	struct thread *td;
962 {
963 	struct filedesc *fdp;
964 	struct vnode *oldvp;
965 	int vfslocked;
966 	int error;
967 
968 	VFS_ASSERT_GIANT(vp->v_mount);
969 	fdp = td->td_proc->p_fd;
970 	FILEDESC_XLOCK(fdp);
971 	if (chroot_allow_open_directories == 0 ||
972 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
973 		error = chroot_refuse_vdir_fds(fdp);
974 		if (error) {
975 			FILEDESC_XUNLOCK(fdp);
976 			return (error);
977 		}
978 	}
979 	oldvp = fdp->fd_rdir;
980 	fdp->fd_rdir = vp;
981 	VREF(fdp->fd_rdir);
982 	if (!fdp->fd_jdir) {
983 		fdp->fd_jdir = vp;
984 		VREF(fdp->fd_jdir);
985 	}
986 	FILEDESC_XUNLOCK(fdp);
987 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
988 	vrele(oldvp);
989 	VFS_UNLOCK_GIANT(vfslocked);
990 	return (0);
991 }
992 
993 /*
994  * Check permissions, allocate an open file structure, and call the device
995  * open routine if any.
996  */
997 #ifndef _SYS_SYSPROTO_H_
998 struct open_args {
999 	char	*path;
1000 	int	flags;
1001 	int	mode;
1002 };
1003 #endif
1004 int
1005 open(td, uap)
1006 	struct thread *td;
1007 	register struct open_args /* {
1008 		char *path;
1009 		int flags;
1010 		int mode;
1011 	} */ *uap;
1012 {
1013 
1014 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
1015 }
1016 
1017 #ifndef _SYS_SYSPROTO_H_
1018 struct openat_args {
1019 	int	fd;
1020 	char	*path;
1021 	int	flag;
1022 	int	mode;
1023 };
1024 #endif
1025 int
1026 openat(struct thread *td, struct openat_args *uap)
1027 {
1028 
1029 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
1030 	    uap->mode));
1031 }
1032 
1033 int
1034 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
1035     int mode)
1036 {
1037 
1038 	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
1039 }
1040 
1041 int
1042 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1043     int flags, int mode)
1044 {
1045 	struct proc *p = td->td_proc;
1046 	struct filedesc *fdp = p->p_fd;
1047 	struct file *fp;
1048 	struct vnode *vp;
1049 	struct vattr vat;
1050 	struct mount *mp;
1051 	int cmode;
1052 	struct file *nfp;
1053 	int type, indx, error;
1054 	struct flock lf;
1055 	struct nameidata nd;
1056 	int vfslocked;
1057 
1058 	AUDIT_ARG(fflags, flags);
1059 	AUDIT_ARG(mode, mode);
1060 	/* XXX: audit dirfd */
1061 	/*
1062 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR may
1063 	 * be specified.
1064 	 */
1065 	if (flags & O_EXEC) {
1066 		if (flags & O_ACCMODE)
1067 			return (EINVAL);
1068 	} else if ((flags & O_ACCMODE) == O_ACCMODE)
1069 		return (EINVAL);
1070 	else
1071 		flags = FFLAGS(flags);
1072 
1073 	error = falloc(td, &nfp, &indx);
1074 	if (error)
1075 		return (error);
1076 	/* An extra reference on `nfp' has been held for us by falloc(). */
1077 	fp = nfp;
1078 	/* Set the flags early so the finit in devfs can pick them up. */
1079 	fp->f_flag = flags & FMASK;
1080 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1081 	NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
1082 	    td);
1083 	td->td_dupfd = -1;		/* XXX check for fdopen */
1084 	error = vn_open(&nd, &flags, cmode, fp);
1085 	if (error) {
1086 		/*
1087 		 * If the vn_open replaced the method vector, something
1088 		 * wonderous happened deep below and we just pass it up
1089 		 * pretending we know what we do.
1090 		 */
1091 		if (error == ENXIO && fp->f_ops != &badfileops) {
1092 			fdrop(fp, td);
1093 			td->td_retval[0] = indx;
1094 			return (0);
1095 		}
1096 
1097 		/*
1098 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1099 		 * responsible for dropping the old contents of ofiles[indx]
1100 		 * if it succeeds.
1101 		 */
1102 		if ((error == ENODEV || error == ENXIO) &&
1103 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1104 		    (error =
1105 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1106 			td->td_retval[0] = indx;
1107 			fdrop(fp, td);
1108 			return (0);
1109 		}
1110 		/*
1111 		 * Clean up the descriptor, but only if another thread hadn't
1112 		 * replaced or closed it.
1113 		 */
1114 		fdclose(fdp, fp, indx, td);
1115 		fdrop(fp, td);
1116 
1117 		if (error == ERESTART)
1118 			error = EINTR;
1119 		return (error);
1120 	}
1121 	td->td_dupfd = 0;
1122 	vfslocked = NDHASGIANT(&nd);
1123 	NDFREE(&nd, NDF_ONLY_PNBUF);
1124 	vp = nd.ni_vp;
1125 
1126 	fp->f_vnode = vp;	/* XXX Does devfs need this? */
1127 	/*
1128 	 * If the file wasn't claimed by devfs bind it to the normal
1129 	 * vnode operations here.
1130 	 */
1131 	if (fp->f_ops == &badfileops) {
1132 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1133 		fp->f_seqcount = 1;
1134 		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1135 	}
1136 
1137 	VOP_UNLOCK(vp, 0);
1138 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1139 		lf.l_whence = SEEK_SET;
1140 		lf.l_start = 0;
1141 		lf.l_len = 0;
1142 		if (flags & O_EXLOCK)
1143 			lf.l_type = F_WRLCK;
1144 		else
1145 			lf.l_type = F_RDLCK;
1146 		type = F_FLOCK;
1147 		if ((flags & FNONBLOCK) == 0)
1148 			type |= F_WAIT;
1149 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1150 			    type)) != 0)
1151 			goto bad;
1152 		atomic_set_int(&fp->f_flag, FHASLOCK);
1153 	}
1154 	if (flags & O_TRUNC) {
1155 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1156 			goto bad;
1157 		VATTR_NULL(&vat);
1158 		vat.va_size = 0;
1159 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1160 #ifdef MAC
1161 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
1162 		if (error == 0)
1163 #endif
1164 			error = VOP_SETATTR(vp, &vat, td->td_ucred);
1165 		VOP_UNLOCK(vp, 0);
1166 		vn_finished_write(mp);
1167 		if (error)
1168 			goto bad;
1169 	}
1170 	VFS_UNLOCK_GIANT(vfslocked);
1171 	/*
1172 	 * Release our private reference, leaving the one associated with
1173 	 * the descriptor table intact.
1174 	 */
1175 	fdrop(fp, td);
1176 	td->td_retval[0] = indx;
1177 	return (0);
1178 bad:
1179 	VFS_UNLOCK_GIANT(vfslocked);
1180 	fdclose(fdp, fp, indx, td);
1181 	fdrop(fp, td);
1182 	return (error);
1183 }
1184 
1185 #ifdef COMPAT_43
1186 /*
1187  * Create a file.
1188  */
1189 #ifndef _SYS_SYSPROTO_H_
1190 struct ocreat_args {
1191 	char	*path;
1192 	int	mode;
1193 };
1194 #endif
1195 int
1196 ocreat(td, uap)
1197 	struct thread *td;
1198 	register struct ocreat_args /* {
1199 		char *path;
1200 		int mode;
1201 	} */ *uap;
1202 {
1203 
1204 	return (kern_open(td, uap->path, UIO_USERSPACE,
1205 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1206 }
1207 #endif /* COMPAT_43 */
1208 
1209 /*
1210  * Create a special file.
1211  */
1212 #ifndef _SYS_SYSPROTO_H_
1213 struct mknod_args {
1214 	char	*path;
1215 	int	mode;
1216 	int	dev;
1217 };
1218 #endif
1219 int
1220 mknod(td, uap)
1221 	struct thread *td;
1222 	register struct mknod_args /* {
1223 		char *path;
1224 		int mode;
1225 		int dev;
1226 	} */ *uap;
1227 {
1228 
1229 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1230 }
1231 
1232 #ifndef _SYS_SYSPROTO_H_
1233 struct mknodat_args {
1234 	int	fd;
1235 	char	*path;
1236 	mode_t	mode;
1237 	dev_t	dev;
1238 };
1239 #endif
1240 int
1241 mknodat(struct thread *td, struct mknodat_args *uap)
1242 {
1243 
1244 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1245 	    uap->dev));
1246 }
1247 
1248 int
1249 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1250     int dev)
1251 {
1252 
1253 	return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
1254 }
1255 
1256 int
1257 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1258     int mode, int dev)
1259 {
1260 	struct vnode *vp;
1261 	struct mount *mp;
1262 	struct vattr vattr;
1263 	int error;
1264 	int whiteout = 0;
1265 	struct nameidata nd;
1266 	int vfslocked;
1267 
1268 	AUDIT_ARG(mode, mode);
1269 	AUDIT_ARG(dev, dev);
1270 	switch (mode & S_IFMT) {
1271 	case S_IFCHR:
1272 	case S_IFBLK:
1273 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1274 		break;
1275 	case S_IFMT:
1276 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1277 		break;
1278 	case S_IFWHT:
1279 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1280 		break;
1281 	case S_IFIFO:
1282 		if (dev == 0)
1283 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1284 		/* FALLTHROUGH */
1285 	default:
1286 		error = EINVAL;
1287 		break;
1288 	}
1289 	if (error)
1290 		return (error);
1291 restart:
1292 	bwillwrite();
1293 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1294 	    pathseg, path, fd, td);
1295 	if ((error = namei(&nd)) != 0)
1296 		return (error);
1297 	vfslocked = NDHASGIANT(&nd);
1298 	vp = nd.ni_vp;
1299 	if (vp != NULL) {
1300 		NDFREE(&nd, NDF_ONLY_PNBUF);
1301 		if (vp == nd.ni_dvp)
1302 			vrele(nd.ni_dvp);
1303 		else
1304 			vput(nd.ni_dvp);
1305 		vrele(vp);
1306 		VFS_UNLOCK_GIANT(vfslocked);
1307 		return (EEXIST);
1308 	} else {
1309 		VATTR_NULL(&vattr);
1310 		FILEDESC_SLOCK(td->td_proc->p_fd);
1311 		vattr.va_mode = (mode & ALLPERMS) &
1312 		    ~td->td_proc->p_fd->fd_cmask;
1313 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
1314 		vattr.va_rdev = dev;
1315 		whiteout = 0;
1316 
1317 		switch (mode & S_IFMT) {
1318 		case S_IFMT:	/* used by badsect to flag bad sectors */
1319 			vattr.va_type = VBAD;
1320 			break;
1321 		case S_IFCHR:
1322 			vattr.va_type = VCHR;
1323 			break;
1324 		case S_IFBLK:
1325 			vattr.va_type = VBLK;
1326 			break;
1327 		case S_IFWHT:
1328 			whiteout = 1;
1329 			break;
1330 		default:
1331 			panic("kern_mknod: invalid mode");
1332 		}
1333 	}
1334 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1335 		NDFREE(&nd, NDF_ONLY_PNBUF);
1336 		vput(nd.ni_dvp);
1337 		VFS_UNLOCK_GIANT(vfslocked);
1338 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1339 			return (error);
1340 		goto restart;
1341 	}
1342 #ifdef MAC
1343 	if (error == 0 && !whiteout)
1344 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1345 		    &nd.ni_cnd, &vattr);
1346 #endif
1347 	if (!error) {
1348 		if (whiteout)
1349 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1350 		else {
1351 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1352 						&nd.ni_cnd, &vattr);
1353 			if (error == 0)
1354 				vput(nd.ni_vp);
1355 		}
1356 	}
1357 	NDFREE(&nd, NDF_ONLY_PNBUF);
1358 	vput(nd.ni_dvp);
1359 	vn_finished_write(mp);
1360 	VFS_UNLOCK_GIANT(vfslocked);
1361 	return (error);
1362 }
1363 
1364 /*
1365  * Create a named pipe.
1366  */
1367 #ifndef _SYS_SYSPROTO_H_
1368 struct mkfifo_args {
1369 	char	*path;
1370 	int	mode;
1371 };
1372 #endif
1373 int
1374 mkfifo(td, uap)
1375 	struct thread *td;
1376 	register struct mkfifo_args /* {
1377 		char *path;
1378 		int mode;
1379 	} */ *uap;
1380 {
1381 
1382 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1383 }
1384 
1385 #ifndef _SYS_SYSPROTO_H_
1386 struct mkfifoat_args {
1387 	int	fd;
1388 	char	*path;
1389 	mode_t	mode;
1390 };
1391 #endif
1392 int
1393 mkfifoat(struct thread *td, struct mkfifoat_args *uap)
1394 {
1395 
1396 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
1397 	    uap->mode));
1398 }
1399 
1400 int
1401 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1402 {
1403 
1404 	return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
1405 }
1406 
1407 int
1408 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1409     int mode)
1410 {
1411 	struct mount *mp;
1412 	struct vattr vattr;
1413 	int error;
1414 	struct nameidata nd;
1415 	int vfslocked;
1416 
1417 	AUDIT_ARG(mode, mode);
1418 restart:
1419 	bwillwrite();
1420 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1421 	    pathseg, path, fd, td);
1422 	if ((error = namei(&nd)) != 0)
1423 		return (error);
1424 	vfslocked = NDHASGIANT(&nd);
1425 	if (nd.ni_vp != NULL) {
1426 		NDFREE(&nd, NDF_ONLY_PNBUF);
1427 		if (nd.ni_vp == nd.ni_dvp)
1428 			vrele(nd.ni_dvp);
1429 		else
1430 			vput(nd.ni_dvp);
1431 		vrele(nd.ni_vp);
1432 		VFS_UNLOCK_GIANT(vfslocked);
1433 		return (EEXIST);
1434 	}
1435 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1436 		NDFREE(&nd, NDF_ONLY_PNBUF);
1437 		vput(nd.ni_dvp);
1438 		VFS_UNLOCK_GIANT(vfslocked);
1439 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1440 			return (error);
1441 		goto restart;
1442 	}
1443 	VATTR_NULL(&vattr);
1444 	vattr.va_type = VFIFO;
1445 	FILEDESC_SLOCK(td->td_proc->p_fd);
1446 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1447 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1448 #ifdef MAC
1449 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1450 	    &vattr);
1451 	if (error)
1452 		goto out;
1453 #endif
1454 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1455 	if (error == 0)
1456 		vput(nd.ni_vp);
1457 #ifdef MAC
1458 out:
1459 #endif
1460 	vput(nd.ni_dvp);
1461 	vn_finished_write(mp);
1462 	VFS_UNLOCK_GIANT(vfslocked);
1463 	NDFREE(&nd, NDF_ONLY_PNBUF);
1464 	return (error);
1465 }
1466 
1467 /*
1468  * Make a hard file link.
1469  */
1470 #ifndef _SYS_SYSPROTO_H_
1471 struct link_args {
1472 	char	*path;
1473 	char	*link;
1474 };
1475 #endif
1476 int
1477 link(td, uap)
1478 	struct thread *td;
1479 	register struct link_args /* {
1480 		char *path;
1481 		char *link;
1482 	} */ *uap;
1483 {
1484 
1485 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1486 }
1487 
1488 #ifndef _SYS_SYSPROTO_H_
1489 struct linkat_args {
1490 	int	fd1;
1491 	char	*path1;
1492 	int	fd2;
1493 	char	*path2;
1494 	int	flag;
1495 };
1496 #endif
1497 int
1498 linkat(struct thread *td, struct linkat_args *uap)
1499 {
1500 	int flag;
1501 
1502 	flag = uap->flag;
1503 	if (flag & ~AT_SYMLINK_FOLLOW)
1504 		return (EINVAL);
1505 
1506 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1507 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1508 }
1509 
1510 static int hardlink_check_uid = 0;
1511 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1512     &hardlink_check_uid, 0,
1513     "Unprivileged processes cannot create hard links to files owned by other "
1514     "users");
1515 static int hardlink_check_gid = 0;
1516 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1517     &hardlink_check_gid, 0,
1518     "Unprivileged processes cannot create hard links to files owned by other "
1519     "groups");
1520 
1521 static int
1522 can_hardlink(struct vnode *vp, struct ucred *cred)
1523 {
1524 	struct vattr va;
1525 	int error;
1526 
1527 	if (!hardlink_check_uid && !hardlink_check_gid)
1528 		return (0);
1529 
1530 	error = VOP_GETATTR(vp, &va, cred);
1531 	if (error != 0)
1532 		return (error);
1533 
1534 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1535 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1536 		if (error)
1537 			return (error);
1538 	}
1539 
1540 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1541 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1542 		if (error)
1543 			return (error);
1544 	}
1545 
1546 	return (0);
1547 }
1548 
1549 int
1550 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1551 {
1552 
1553 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
1554 }
1555 
1556 int
1557 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1558     enum uio_seg segflg, int follow)
1559 {
1560 	struct vnode *vp;
1561 	struct mount *mp;
1562 	struct nameidata nd;
1563 	int vfslocked;
1564 	int lvfslocked;
1565 	int error;
1566 
1567 	bwillwrite();
1568 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
1569 	    fd1, td);
1570 
1571 	if ((error = namei(&nd)) != 0)
1572 		return (error);
1573 	vfslocked = NDHASGIANT(&nd);
1574 	NDFREE(&nd, NDF_ONLY_PNBUF);
1575 	vp = nd.ni_vp;
1576 	if (vp->v_type == VDIR) {
1577 		vrele(vp);
1578 		VFS_UNLOCK_GIANT(vfslocked);
1579 		return (EPERM);		/* POSIX */
1580 	}
1581 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1582 		vrele(vp);
1583 		VFS_UNLOCK_GIANT(vfslocked);
1584 		return (error);
1585 	}
1586 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1587 	    segflg, path2, fd2, td);
1588 	if ((error = namei(&nd)) == 0) {
1589 		lvfslocked = NDHASGIANT(&nd);
1590 		if (nd.ni_vp != NULL) {
1591 			if (nd.ni_dvp == nd.ni_vp)
1592 				vrele(nd.ni_dvp);
1593 			else
1594 				vput(nd.ni_dvp);
1595 			vrele(nd.ni_vp);
1596 			error = EEXIST;
1597 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1598 		    == 0) {
1599 			error = can_hardlink(vp, td->td_ucred);
1600 			if (error == 0)
1601 #ifdef MAC
1602 				error = mac_vnode_check_link(td->td_ucred,
1603 				    nd.ni_dvp, vp, &nd.ni_cnd);
1604 			if (error == 0)
1605 #endif
1606 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1607 			VOP_UNLOCK(vp, 0);
1608 			vput(nd.ni_dvp);
1609 		}
1610 		NDFREE(&nd, NDF_ONLY_PNBUF);
1611 		VFS_UNLOCK_GIANT(lvfslocked);
1612 	}
1613 	vrele(vp);
1614 	vn_finished_write(mp);
1615 	VFS_UNLOCK_GIANT(vfslocked);
1616 	return (error);
1617 }
1618 
1619 /*
1620  * Make a symbolic link.
1621  */
1622 #ifndef _SYS_SYSPROTO_H_
1623 struct symlink_args {
1624 	char	*path;
1625 	char	*link;
1626 };
1627 #endif
1628 int
1629 symlink(td, uap)
1630 	struct thread *td;
1631 	register struct symlink_args /* {
1632 		char *path;
1633 		char *link;
1634 	} */ *uap;
1635 {
1636 
1637 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1638 }
1639 
1640 #ifndef _SYS_SYSPROTO_H_
1641 struct symlinkat_args {
1642 	char	*path;
1643 	int	fd;
1644 	char	*path2;
1645 };
1646 #endif
1647 int
1648 symlinkat(struct thread *td, struct symlinkat_args *uap)
1649 {
1650 
1651 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1652 	    UIO_USERSPACE));
1653 }
1654 
1655 int
1656 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1657 {
1658 
1659 	return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
1660 }
1661 
1662 int
1663 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1664     enum uio_seg segflg)
1665 {
1666 	struct mount *mp;
1667 	struct vattr vattr;
1668 	char *syspath;
1669 	int error;
1670 	struct nameidata nd;
1671 	int vfslocked;
1672 
1673 	if (segflg == UIO_SYSSPACE) {
1674 		syspath = path1;
1675 	} else {
1676 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1677 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1678 			goto out;
1679 	}
1680 	AUDIT_ARG(text, syspath);
1681 restart:
1682 	bwillwrite();
1683 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1684 	    segflg, path2, fd, td);
1685 	if ((error = namei(&nd)) != 0)
1686 		goto out;
1687 	vfslocked = NDHASGIANT(&nd);
1688 	if (nd.ni_vp) {
1689 		NDFREE(&nd, NDF_ONLY_PNBUF);
1690 		if (nd.ni_vp == nd.ni_dvp)
1691 			vrele(nd.ni_dvp);
1692 		else
1693 			vput(nd.ni_dvp);
1694 		vrele(nd.ni_vp);
1695 		VFS_UNLOCK_GIANT(vfslocked);
1696 		error = EEXIST;
1697 		goto out;
1698 	}
1699 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1700 		NDFREE(&nd, NDF_ONLY_PNBUF);
1701 		vput(nd.ni_dvp);
1702 		VFS_UNLOCK_GIANT(vfslocked);
1703 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1704 			goto out;
1705 		goto restart;
1706 	}
1707 	VATTR_NULL(&vattr);
1708 	FILEDESC_SLOCK(td->td_proc->p_fd);
1709 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1710 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
1711 #ifdef MAC
1712 	vattr.va_type = VLNK;
1713 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1714 	    &vattr);
1715 	if (error)
1716 		goto out2;
1717 #endif
1718 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1719 	if (error == 0)
1720 		vput(nd.ni_vp);
1721 #ifdef MAC
1722 out2:
1723 #endif
1724 	NDFREE(&nd, NDF_ONLY_PNBUF);
1725 	vput(nd.ni_dvp);
1726 	vn_finished_write(mp);
1727 	VFS_UNLOCK_GIANT(vfslocked);
1728 out:
1729 	if (segflg != UIO_SYSSPACE)
1730 		uma_zfree(namei_zone, syspath);
1731 	return (error);
1732 }
1733 
1734 /*
1735  * Delete a whiteout from the filesystem.
1736  */
1737 int
1738 undelete(td, uap)
1739 	struct thread *td;
1740 	register struct undelete_args /* {
1741 		char *path;
1742 	} */ *uap;
1743 {
1744 	int error;
1745 	struct mount *mp;
1746 	struct nameidata nd;
1747 	int vfslocked;
1748 
1749 restart:
1750 	bwillwrite();
1751 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1752 	    UIO_USERSPACE, uap->path, td);
1753 	error = namei(&nd);
1754 	if (error)
1755 		return (error);
1756 	vfslocked = NDHASGIANT(&nd);
1757 
1758 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1759 		NDFREE(&nd, NDF_ONLY_PNBUF);
1760 		if (nd.ni_vp == nd.ni_dvp)
1761 			vrele(nd.ni_dvp);
1762 		else
1763 			vput(nd.ni_dvp);
1764 		if (nd.ni_vp)
1765 			vrele(nd.ni_vp);
1766 		VFS_UNLOCK_GIANT(vfslocked);
1767 		return (EEXIST);
1768 	}
1769 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1770 		NDFREE(&nd, NDF_ONLY_PNBUF);
1771 		vput(nd.ni_dvp);
1772 		VFS_UNLOCK_GIANT(vfslocked);
1773 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1774 			return (error);
1775 		goto restart;
1776 	}
1777 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1778 	NDFREE(&nd, NDF_ONLY_PNBUF);
1779 	vput(nd.ni_dvp);
1780 	vn_finished_write(mp);
1781 	VFS_UNLOCK_GIANT(vfslocked);
1782 	return (error);
1783 }
1784 
1785 /*
1786  * Delete a name from the filesystem.
1787  */
1788 #ifndef _SYS_SYSPROTO_H_
1789 struct unlink_args {
1790 	char	*path;
1791 };
1792 #endif
1793 int
1794 unlink(td, uap)
1795 	struct thread *td;
1796 	struct unlink_args /* {
1797 		char *path;
1798 	} */ *uap;
1799 {
1800 
1801 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1802 }
1803 
1804 #ifndef _SYS_SYSPROTO_H_
1805 struct unlinkat_args {
1806 	int	fd;
1807 	char	*path;
1808 	int	flag;
1809 };
1810 #endif
1811 int
1812 unlinkat(struct thread *td, struct unlinkat_args *uap)
1813 {
1814 	int flag = uap->flag;
1815 	int fd = uap->fd;
1816 	char *path = uap->path;
1817 
1818 	if (flag & ~AT_REMOVEDIR)
1819 		return (EINVAL);
1820 
1821 	if (flag & AT_REMOVEDIR)
1822 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
1823 	else
1824 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE));
1825 }
1826 
1827 int
1828 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1829 {
1830 
1831 	return (kern_unlinkat(td, AT_FDCWD, path, pathseg));
1832 }
1833 
1834 int
1835 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
1836 {
1837 	struct mount *mp;
1838 	struct vnode *vp;
1839 	int error;
1840 	struct nameidata nd;
1841 	int vfslocked;
1842 
1843 restart:
1844 	bwillwrite();
1845 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1846 	    pathseg, path, fd, td);
1847 	if ((error = namei(&nd)) != 0)
1848 		return (error == EINVAL ? EPERM : error);
1849 	vfslocked = NDHASGIANT(&nd);
1850 	vp = nd.ni_vp;
1851 	if (vp->v_type == VDIR)
1852 		error = EPERM;		/* POSIX */
1853 	else {
1854 		/*
1855 		 * The root of a mounted filesystem cannot be deleted.
1856 		 *
1857 		 * XXX: can this only be a VDIR case?
1858 		 */
1859 		if (vp->v_vflag & VV_ROOT)
1860 			error = EBUSY;
1861 	}
1862 	if (error == 0) {
1863 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1864 			NDFREE(&nd, NDF_ONLY_PNBUF);
1865 			vput(nd.ni_dvp);
1866 			if (vp == nd.ni_dvp)
1867 				vrele(vp);
1868 			else
1869 				vput(vp);
1870 			VFS_UNLOCK_GIANT(vfslocked);
1871 			if ((error = vn_start_write(NULL, &mp,
1872 			    V_XSLEEP | PCATCH)) != 0)
1873 				return (error);
1874 			goto restart;
1875 		}
1876 #ifdef MAC
1877 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1878 		    &nd.ni_cnd);
1879 		if (error)
1880 			goto out;
1881 #endif
1882 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1883 #ifdef MAC
1884 out:
1885 #endif
1886 		vn_finished_write(mp);
1887 	}
1888 	NDFREE(&nd, NDF_ONLY_PNBUF);
1889 	vput(nd.ni_dvp);
1890 	if (vp == nd.ni_dvp)
1891 		vrele(vp);
1892 	else
1893 		vput(vp);
1894 	VFS_UNLOCK_GIANT(vfslocked);
1895 	return (error);
1896 }
1897 
1898 /*
1899  * Reposition read/write file offset.
1900  */
1901 #ifndef _SYS_SYSPROTO_H_
1902 struct lseek_args {
1903 	int	fd;
1904 	int	pad;
1905 	off_t	offset;
1906 	int	whence;
1907 };
1908 #endif
1909 int
1910 lseek(td, uap)
1911 	struct thread *td;
1912 	register struct lseek_args /* {
1913 		int fd;
1914 		int pad;
1915 		off_t offset;
1916 		int whence;
1917 	} */ *uap;
1918 {
1919 	struct ucred *cred = td->td_ucred;
1920 	struct file *fp;
1921 	struct vnode *vp;
1922 	struct vattr vattr;
1923 	off_t offset;
1924 	int error, noneg;
1925 	int vfslocked;
1926 
1927 	if ((error = fget(td, uap->fd, &fp)) != 0)
1928 		return (error);
1929 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1930 		fdrop(fp, td);
1931 		return (ESPIPE);
1932 	}
1933 	vp = fp->f_vnode;
1934 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1935 	noneg = (vp->v_type != VCHR);
1936 	offset = uap->offset;
1937 	switch (uap->whence) {
1938 	case L_INCR:
1939 		if (noneg &&
1940 		    (fp->f_offset < 0 ||
1941 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1942 			error = EOVERFLOW;
1943 			break;
1944 		}
1945 		offset += fp->f_offset;
1946 		break;
1947 	case L_XTND:
1948 		vn_lock(vp, LK_SHARED | LK_RETRY);
1949 		error = VOP_GETATTR(vp, &vattr, cred);
1950 		VOP_UNLOCK(vp, 0);
1951 		if (error)
1952 			break;
1953 		if (noneg &&
1954 		    (vattr.va_size > OFF_MAX ||
1955 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1956 			error = EOVERFLOW;
1957 			break;
1958 		}
1959 		offset += vattr.va_size;
1960 		break;
1961 	case L_SET:
1962 		break;
1963 	case SEEK_DATA:
1964 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1965 		break;
1966 	case SEEK_HOLE:
1967 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1968 		break;
1969 	default:
1970 		error = EINVAL;
1971 	}
1972 	if (error == 0 && noneg && offset < 0)
1973 		error = EINVAL;
1974 	if (error != 0)
1975 		goto drop;
1976 	fp->f_offset = offset;
1977 	*(off_t *)(td->td_retval) = fp->f_offset;
1978 drop:
1979 	fdrop(fp, td);
1980 	VFS_UNLOCK_GIANT(vfslocked);
1981 	return (error);
1982 }
1983 
1984 #if defined(COMPAT_43)
1985 /*
1986  * Reposition read/write file offset.
1987  */
1988 #ifndef _SYS_SYSPROTO_H_
1989 struct olseek_args {
1990 	int	fd;
1991 	long	offset;
1992 	int	whence;
1993 };
1994 #endif
1995 int
1996 olseek(td, uap)
1997 	struct thread *td;
1998 	register struct olseek_args /* {
1999 		int fd;
2000 		long offset;
2001 		int whence;
2002 	} */ *uap;
2003 {
2004 	struct lseek_args /* {
2005 		int fd;
2006 		int pad;
2007 		off_t offset;
2008 		int whence;
2009 	} */ nuap;
2010 
2011 	nuap.fd = uap->fd;
2012 	nuap.offset = uap->offset;
2013 	nuap.whence = uap->whence;
2014 	return (lseek(td, &nuap));
2015 }
2016 #endif /* COMPAT_43 */
2017 
2018 /* Version with the 'pad' argument */
2019 int
2020 freebsd6_lseek(td, uap)
2021 	struct thread *td;
2022 	register struct freebsd6_lseek_args *uap;
2023 {
2024 	struct lseek_args ouap;
2025 
2026 	ouap.fd = uap->fd;
2027 	ouap.offset = uap->offset;
2028 	ouap.whence = uap->whence;
2029 	return (lseek(td, &ouap));
2030 }
2031 
2032 /*
2033  * Check access permissions using passed credentials.
2034  */
2035 static int
2036 vn_access(vp, user_flags, cred, td)
2037 	struct vnode	*vp;
2038 	int		user_flags;
2039 	struct ucred	*cred;
2040 	struct thread	*td;
2041 {
2042 	int error;
2043 	accmode_t accmode;
2044 
2045 	/* Flags == 0 means only check for existence. */
2046 	error = 0;
2047 	if (user_flags) {
2048 		accmode = 0;
2049 		if (user_flags & R_OK)
2050 			accmode |= VREAD;
2051 		if (user_flags & W_OK)
2052 			accmode |= VWRITE;
2053 		if (user_flags & X_OK)
2054 			accmode |= VEXEC;
2055 #ifdef MAC
2056 		error = mac_vnode_check_access(cred, vp, accmode);
2057 		if (error)
2058 			return (error);
2059 #endif
2060 		if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2061 			error = VOP_ACCESS(vp, accmode, cred, td);
2062 	}
2063 	return (error);
2064 }
2065 
2066 /*
2067  * Check access permissions using "real" credentials.
2068  */
2069 #ifndef _SYS_SYSPROTO_H_
2070 struct access_args {
2071 	char	*path;
2072 	int	flags;
2073 };
2074 #endif
2075 int
2076 access(td, uap)
2077 	struct thread *td;
2078 	register struct access_args /* {
2079 		char *path;
2080 		int flags;
2081 	} */ *uap;
2082 {
2083 
2084 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
2085 }
2086 
2087 #ifndef _SYS_SYSPROTO_H_
2088 struct faccessat_args {
2089 	int	dirfd;
2090 	char	*path;
2091 	int	mode;
2092 	int	flag;
2093 }
2094 #endif
2095 int
2096 faccessat(struct thread *td, struct faccessat_args *uap)
2097 {
2098 
2099 	if (uap->flag & ~AT_EACCESS)
2100 		return (EINVAL);
2101 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
2102 	    uap->mode));
2103 }
2104 
2105 int
2106 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2107 {
2108 
2109 	return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
2110 }
2111 
2112 int
2113 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2114     int flags, int mode)
2115 {
2116 	struct ucred *cred, *tmpcred;
2117 	struct vnode *vp;
2118 	struct nameidata nd;
2119 	int vfslocked;
2120 	int error;
2121 
2122 	/*
2123 	 * Create and modify a temporary credential instead of one that
2124 	 * is potentially shared.  This could also mess up socket
2125 	 * buffer accounting which can run in an interrupt context.
2126 	 */
2127 	if (!(flags & AT_EACCESS)) {
2128 		cred = td->td_ucred;
2129 		tmpcred = crdup(cred);
2130 		tmpcred->cr_uid = cred->cr_ruid;
2131 		tmpcred->cr_groups[0] = cred->cr_rgid;
2132 		td->td_ucred = tmpcred;
2133 	} else
2134 		cred = tmpcred = td->td_ucred;
2135 	NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2136 	    AUDITVNODE1, pathseg, path, fd, td);
2137 	if ((error = namei(&nd)) != 0)
2138 		goto out1;
2139 	vfslocked = NDHASGIANT(&nd);
2140 	vp = nd.ni_vp;
2141 
2142 	error = vn_access(vp, mode, tmpcred, td);
2143 	NDFREE(&nd, NDF_ONLY_PNBUF);
2144 	vput(vp);
2145 	VFS_UNLOCK_GIANT(vfslocked);
2146 out1:
2147 	if (!(flags & AT_EACCESS)) {
2148 		td->td_ucred = cred;
2149 		crfree(tmpcred);
2150 	}
2151 	return (error);
2152 }
2153 
2154 /*
2155  * Check access permissions using "effective" credentials.
2156  */
2157 #ifndef _SYS_SYSPROTO_H_
2158 struct eaccess_args {
2159 	char	*path;
2160 	int	flags;
2161 };
2162 #endif
2163 int
2164 eaccess(td, uap)
2165 	struct thread *td;
2166 	register struct eaccess_args /* {
2167 		char *path;
2168 		int flags;
2169 	} */ *uap;
2170 {
2171 
2172 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
2173 }
2174 
2175 int
2176 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
2177 {
2178 
2179 	return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
2180 }
2181 
2182 #if defined(COMPAT_43)
2183 /*
2184  * Get file status; this version follows links.
2185  */
2186 #ifndef _SYS_SYSPROTO_H_
2187 struct ostat_args {
2188 	char	*path;
2189 	struct ostat *ub;
2190 };
2191 #endif
2192 int
2193 ostat(td, uap)
2194 	struct thread *td;
2195 	register struct ostat_args /* {
2196 		char *path;
2197 		struct ostat *ub;
2198 	} */ *uap;
2199 {
2200 	struct stat sb;
2201 	struct ostat osb;
2202 	int error;
2203 
2204 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2205 	if (error)
2206 		return (error);
2207 	cvtstat(&sb, &osb);
2208 	error = copyout(&osb, uap->ub, sizeof (osb));
2209 	return (error);
2210 }
2211 
2212 /*
2213  * Get file status; this version does not follow links.
2214  */
2215 #ifndef _SYS_SYSPROTO_H_
2216 struct olstat_args {
2217 	char	*path;
2218 	struct ostat *ub;
2219 };
2220 #endif
2221 int
2222 olstat(td, uap)
2223 	struct thread *td;
2224 	register struct olstat_args /* {
2225 		char *path;
2226 		struct ostat *ub;
2227 	} */ *uap;
2228 {
2229 	struct stat sb;
2230 	struct ostat osb;
2231 	int error;
2232 
2233 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2234 	if (error)
2235 		return (error);
2236 	cvtstat(&sb, &osb);
2237 	error = copyout(&osb, uap->ub, sizeof (osb));
2238 	return (error);
2239 }
2240 
2241 /*
2242  * Convert from an old to a new stat structure.
2243  */
2244 void
2245 cvtstat(st, ost)
2246 	struct stat *st;
2247 	struct ostat *ost;
2248 {
2249 
2250 	ost->st_dev = st->st_dev;
2251 	ost->st_ino = st->st_ino;
2252 	ost->st_mode = st->st_mode;
2253 	ost->st_nlink = st->st_nlink;
2254 	ost->st_uid = st->st_uid;
2255 	ost->st_gid = st->st_gid;
2256 	ost->st_rdev = st->st_rdev;
2257 	if (st->st_size < (quad_t)1 << 32)
2258 		ost->st_size = st->st_size;
2259 	else
2260 		ost->st_size = -2;
2261 	ost->st_atime = st->st_atime;
2262 	ost->st_mtime = st->st_mtime;
2263 	ost->st_ctime = st->st_ctime;
2264 	ost->st_blksize = st->st_blksize;
2265 	ost->st_blocks = st->st_blocks;
2266 	ost->st_flags = st->st_flags;
2267 	ost->st_gen = st->st_gen;
2268 }
2269 #endif /* COMPAT_43 */
2270 
2271 /*
2272  * Get file status; this version follows links.
2273  */
2274 #ifndef _SYS_SYSPROTO_H_
2275 struct stat_args {
2276 	char	*path;
2277 	struct stat *ub;
2278 };
2279 #endif
2280 int
2281 stat(td, uap)
2282 	struct thread *td;
2283 	register struct stat_args /* {
2284 		char *path;
2285 		struct stat *ub;
2286 	} */ *uap;
2287 {
2288 	struct stat sb;
2289 	int error;
2290 
2291 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2292 	if (error == 0)
2293 		error = copyout(&sb, uap->ub, sizeof (sb));
2294 	return (error);
2295 }
2296 
2297 #ifndef _SYS_SYSPROTO_H_
2298 struct fstatat_args {
2299 	int	fd;
2300 	char	*path;
2301 	struct stat	*buf;
2302 	int	flag;
2303 }
2304 #endif
2305 int
2306 fstatat(struct thread *td, struct fstatat_args *uap)
2307 {
2308 	struct stat sb;
2309 	int error;
2310 
2311 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2312 	    UIO_USERSPACE, &sb);
2313 	if (error == 0)
2314 		error = copyout(&sb, uap->buf, sizeof (sb));
2315 	return (error);
2316 }
2317 
2318 int
2319 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2320 {
2321 
2322 	return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
2323 }
2324 
2325 int
2326 kern_statat(struct thread *td, int flag, int fd, char *path,
2327     enum uio_seg pathseg, struct stat *sbp)
2328 {
2329 
2330 	return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
2331 }
2332 
2333 int
2334 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
2335     enum uio_seg pathseg, struct stat *sbp,
2336     void (*hook)(struct vnode *vp, struct stat *sbp))
2337 {
2338 	struct nameidata nd;
2339 	struct stat sb;
2340 	int error, vfslocked;
2341 
2342 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2343 		return (EINVAL);
2344 
2345 	NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2346 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
2347 	    path, fd, td);
2348 
2349 	if ((error = namei(&nd)) != 0)
2350 		return (error);
2351 	vfslocked = NDHASGIANT(&nd);
2352 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2353 	if (!error) {
2354 		SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
2355 		if (S_ISREG(sb.st_mode))
2356 			SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
2357 		if (__predict_false(hook != NULL))
2358 			hook(nd.ni_vp, &sb);
2359 	}
2360 	NDFREE(&nd, NDF_ONLY_PNBUF);
2361 	vput(nd.ni_vp);
2362 	VFS_UNLOCK_GIANT(vfslocked);
2363 	if (error)
2364 		return (error);
2365 	*sbp = sb;
2366 #ifdef KTRACE
2367 	if (KTRPOINT(td, KTR_STRUCT))
2368 		ktrstat(&sb);
2369 #endif
2370 	return (0);
2371 }
2372 
2373 /*
2374  * Get file status; this version does not follow links.
2375  */
2376 #ifndef _SYS_SYSPROTO_H_
2377 struct lstat_args {
2378 	char	*path;
2379 	struct stat *ub;
2380 };
2381 #endif
2382 int
2383 lstat(td, uap)
2384 	struct thread *td;
2385 	register struct lstat_args /* {
2386 		char *path;
2387 		struct stat *ub;
2388 	} */ *uap;
2389 {
2390 	struct stat sb;
2391 	int error;
2392 
2393 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2394 	if (error == 0)
2395 		error = copyout(&sb, uap->ub, sizeof (sb));
2396 	return (error);
2397 }
2398 
2399 int
2400 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2401 {
2402 
2403 	return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
2404 	    sbp));
2405 }
2406 
2407 /*
2408  * Implementation of the NetBSD [l]stat() functions.
2409  */
2410 void
2411 cvtnstat(sb, nsb)
2412 	struct stat *sb;
2413 	struct nstat *nsb;
2414 {
2415 	bzero(nsb, sizeof *nsb);
2416 	nsb->st_dev = sb->st_dev;
2417 	nsb->st_ino = sb->st_ino;
2418 	nsb->st_mode = sb->st_mode;
2419 	nsb->st_nlink = sb->st_nlink;
2420 	nsb->st_uid = sb->st_uid;
2421 	nsb->st_gid = sb->st_gid;
2422 	nsb->st_rdev = sb->st_rdev;
2423 	nsb->st_atimespec = sb->st_atimespec;
2424 	nsb->st_mtimespec = sb->st_mtimespec;
2425 	nsb->st_ctimespec = sb->st_ctimespec;
2426 	nsb->st_size = sb->st_size;
2427 	nsb->st_blocks = sb->st_blocks;
2428 	nsb->st_blksize = sb->st_blksize;
2429 	nsb->st_flags = sb->st_flags;
2430 	nsb->st_gen = sb->st_gen;
2431 	nsb->st_birthtimespec = sb->st_birthtimespec;
2432 }
2433 
2434 #ifndef _SYS_SYSPROTO_H_
2435 struct nstat_args {
2436 	char	*path;
2437 	struct nstat *ub;
2438 };
2439 #endif
2440 int
2441 nstat(td, uap)
2442 	struct thread *td;
2443 	register struct nstat_args /* {
2444 		char *path;
2445 		struct nstat *ub;
2446 	} */ *uap;
2447 {
2448 	struct stat sb;
2449 	struct nstat nsb;
2450 	int error;
2451 
2452 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2453 	if (error)
2454 		return (error);
2455 	cvtnstat(&sb, &nsb);
2456 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2457 	return (error);
2458 }
2459 
2460 /*
2461  * NetBSD lstat.  Get file status; this version does not follow links.
2462  */
2463 #ifndef _SYS_SYSPROTO_H_
2464 struct lstat_args {
2465 	char	*path;
2466 	struct stat *ub;
2467 };
2468 #endif
2469 int
2470 nlstat(td, uap)
2471 	struct thread *td;
2472 	register struct nlstat_args /* {
2473 		char *path;
2474 		struct nstat *ub;
2475 	} */ *uap;
2476 {
2477 	struct stat sb;
2478 	struct nstat nsb;
2479 	int error;
2480 
2481 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2482 	if (error)
2483 		return (error);
2484 	cvtnstat(&sb, &nsb);
2485 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2486 	return (error);
2487 }
2488 
2489 /*
2490  * Get configurable pathname variables.
2491  */
2492 #ifndef _SYS_SYSPROTO_H_
2493 struct pathconf_args {
2494 	char	*path;
2495 	int	name;
2496 };
2497 #endif
2498 int
2499 pathconf(td, uap)
2500 	struct thread *td;
2501 	register struct pathconf_args /* {
2502 		char *path;
2503 		int name;
2504 	} */ *uap;
2505 {
2506 
2507 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2508 }
2509 
2510 int
2511 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2512 {
2513 	struct nameidata nd;
2514 	int error, vfslocked;
2515 
2516 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2517 	    AUDITVNODE1, pathseg, path, td);
2518 	if ((error = namei(&nd)) != 0)
2519 		return (error);
2520 	vfslocked = NDHASGIANT(&nd);
2521 	NDFREE(&nd, NDF_ONLY_PNBUF);
2522 
2523 	/* If asynchronous I/O is available, it works for all files. */
2524 	if (name == _PC_ASYNC_IO)
2525 		td->td_retval[0] = async_io_version;
2526 	else
2527 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2528 	vput(nd.ni_vp);
2529 	VFS_UNLOCK_GIANT(vfslocked);
2530 	return (error);
2531 }
2532 
2533 /*
2534  * Return target name of a symbolic link.
2535  */
2536 #ifndef _SYS_SYSPROTO_H_
2537 struct readlink_args {
2538 	char	*path;
2539 	char	*buf;
2540 	size_t	count;
2541 };
2542 #endif
2543 int
2544 readlink(td, uap)
2545 	struct thread *td;
2546 	register struct readlink_args /* {
2547 		char *path;
2548 		char *buf;
2549 		size_t count;
2550 	} */ *uap;
2551 {
2552 
2553 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2554 	    UIO_USERSPACE, uap->count));
2555 }
2556 #ifndef _SYS_SYSPROTO_H_
2557 struct readlinkat_args {
2558 	int	fd;
2559 	char	*path;
2560 	char	*buf;
2561 	size_t	bufsize;
2562 };
2563 #endif
2564 int
2565 readlinkat(struct thread *td, struct readlinkat_args *uap)
2566 {
2567 
2568 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
2569 	    uap->buf, UIO_USERSPACE, uap->bufsize));
2570 }
2571 
2572 int
2573 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2574     enum uio_seg bufseg, size_t count)
2575 {
2576 
2577 	return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
2578 	    count));
2579 }
2580 
2581 int
2582 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2583     char *buf, enum uio_seg bufseg, size_t count)
2584 {
2585 	struct vnode *vp;
2586 	struct iovec aiov;
2587 	struct uio auio;
2588 	int error;
2589 	struct nameidata nd;
2590 	int vfslocked;
2591 
2592 	if (count > INT_MAX)
2593 		return (EINVAL);
2594 
2595 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2596 	    AUDITVNODE1, pathseg, path, fd, td);
2597 
2598 	if ((error = namei(&nd)) != 0)
2599 		return (error);
2600 	NDFREE(&nd, NDF_ONLY_PNBUF);
2601 	vfslocked = NDHASGIANT(&nd);
2602 	vp = nd.ni_vp;
2603 #ifdef MAC
2604 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2605 	if (error) {
2606 		vput(vp);
2607 		VFS_UNLOCK_GIANT(vfslocked);
2608 		return (error);
2609 	}
2610 #endif
2611 	if (vp->v_type != VLNK)
2612 		error = EINVAL;
2613 	else {
2614 		aiov.iov_base = buf;
2615 		aiov.iov_len = count;
2616 		auio.uio_iov = &aiov;
2617 		auio.uio_iovcnt = 1;
2618 		auio.uio_offset = 0;
2619 		auio.uio_rw = UIO_READ;
2620 		auio.uio_segflg = bufseg;
2621 		auio.uio_td = td;
2622 		auio.uio_resid = count;
2623 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2624 	}
2625 	vput(vp);
2626 	VFS_UNLOCK_GIANT(vfslocked);
2627 	td->td_retval[0] = count - auio.uio_resid;
2628 	return (error);
2629 }
2630 
2631 /*
2632  * Common implementation code for chflags() and fchflags().
2633  */
2634 static int
2635 setfflags(td, vp, flags)
2636 	struct thread *td;
2637 	struct vnode *vp;
2638 	int flags;
2639 {
2640 	int error;
2641 	struct mount *mp;
2642 	struct vattr vattr;
2643 
2644 	/*
2645 	 * Prevent non-root users from setting flags on devices.  When
2646 	 * a device is reused, users can retain ownership of the device
2647 	 * if they are allowed to set flags and programs assume that
2648 	 * chown can't fail when done as root.
2649 	 */
2650 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2651 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2652 		if (error)
2653 			return (error);
2654 	}
2655 
2656 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2657 		return (error);
2658 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2659 	VATTR_NULL(&vattr);
2660 	vattr.va_flags = flags;
2661 #ifdef MAC
2662 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2663 	if (error == 0)
2664 #endif
2665 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2666 	VOP_UNLOCK(vp, 0);
2667 	vn_finished_write(mp);
2668 	return (error);
2669 }
2670 
2671 /*
2672  * Change flags of a file given a path name.
2673  */
2674 #ifndef _SYS_SYSPROTO_H_
2675 struct chflags_args {
2676 	char	*path;
2677 	int	flags;
2678 };
2679 #endif
2680 int
2681 chflags(td, uap)
2682 	struct thread *td;
2683 	register struct chflags_args /* {
2684 		char *path;
2685 		int flags;
2686 	} */ *uap;
2687 {
2688 	int error;
2689 	struct nameidata nd;
2690 	int vfslocked;
2691 
2692 	AUDIT_ARG(fflags, uap->flags);
2693 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2694 	    uap->path, td);
2695 	if ((error = namei(&nd)) != 0)
2696 		return (error);
2697 	NDFREE(&nd, NDF_ONLY_PNBUF);
2698 	vfslocked = NDHASGIANT(&nd);
2699 	error = setfflags(td, nd.ni_vp, uap->flags);
2700 	vrele(nd.ni_vp);
2701 	VFS_UNLOCK_GIANT(vfslocked);
2702 	return (error);
2703 }
2704 
2705 /*
2706  * Same as chflags() but doesn't follow symlinks.
2707  */
2708 int
2709 lchflags(td, uap)
2710 	struct thread *td;
2711 	register struct lchflags_args /* {
2712 		char *path;
2713 		int flags;
2714 	} */ *uap;
2715 {
2716 	int error;
2717 	struct nameidata nd;
2718 	int vfslocked;
2719 
2720 	AUDIT_ARG(fflags, uap->flags);
2721 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2722 	    uap->path, td);
2723 	if ((error = namei(&nd)) != 0)
2724 		return (error);
2725 	vfslocked = NDHASGIANT(&nd);
2726 	NDFREE(&nd, NDF_ONLY_PNBUF);
2727 	error = setfflags(td, nd.ni_vp, uap->flags);
2728 	vrele(nd.ni_vp);
2729 	VFS_UNLOCK_GIANT(vfslocked);
2730 	return (error);
2731 }
2732 
2733 /*
2734  * Change flags of a file given a file descriptor.
2735  */
2736 #ifndef _SYS_SYSPROTO_H_
2737 struct fchflags_args {
2738 	int	fd;
2739 	int	flags;
2740 };
2741 #endif
2742 int
2743 fchflags(td, uap)
2744 	struct thread *td;
2745 	register struct fchflags_args /* {
2746 		int fd;
2747 		int flags;
2748 	} */ *uap;
2749 {
2750 	struct file *fp;
2751 	int vfslocked;
2752 	int error;
2753 
2754 	AUDIT_ARG(fd, uap->fd);
2755 	AUDIT_ARG(fflags, uap->flags);
2756 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2757 		return (error);
2758 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2759 #ifdef AUDIT
2760 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2761 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2762 	VOP_UNLOCK(fp->f_vnode, 0);
2763 #endif
2764 	error = setfflags(td, fp->f_vnode, uap->flags);
2765 	VFS_UNLOCK_GIANT(vfslocked);
2766 	fdrop(fp, td);
2767 	return (error);
2768 }
2769 
2770 /*
2771  * Common implementation code for chmod(), lchmod() and fchmod().
2772  */
2773 static int
2774 setfmode(td, vp, mode)
2775 	struct thread *td;
2776 	struct vnode *vp;
2777 	int mode;
2778 {
2779 	int error;
2780 	struct mount *mp;
2781 	struct vattr vattr;
2782 
2783 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2784 		return (error);
2785 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2786 	VATTR_NULL(&vattr);
2787 	vattr.va_mode = mode & ALLPERMS;
2788 #ifdef MAC
2789 	error = mac_vnode_check_setmode(td->td_ucred, vp, vattr.va_mode);
2790 	if (error == 0)
2791 #endif
2792 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2793 	VOP_UNLOCK(vp, 0);
2794 	vn_finished_write(mp);
2795 	return (error);
2796 }
2797 
2798 /*
2799  * Change mode of a file given path name.
2800  */
2801 #ifndef _SYS_SYSPROTO_H_
2802 struct chmod_args {
2803 	char	*path;
2804 	int	mode;
2805 };
2806 #endif
2807 int
2808 chmod(td, uap)
2809 	struct thread *td;
2810 	register struct chmod_args /* {
2811 		char *path;
2812 		int mode;
2813 	} */ *uap;
2814 {
2815 
2816 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2817 }
2818 
2819 #ifndef _SYS_SYSPROTO_H_
2820 struct fchmodat_args {
2821 	int	dirfd;
2822 	char	*path;
2823 	mode_t	mode;
2824 	int	flag;
2825 }
2826 #endif
2827 int
2828 fchmodat(struct thread *td, struct fchmodat_args *uap)
2829 {
2830 	int flag = uap->flag;
2831 	int fd = uap->fd;
2832 	char *path = uap->path;
2833 	mode_t mode = uap->mode;
2834 
2835 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2836 		return (EINVAL);
2837 
2838 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2839 }
2840 
2841 int
2842 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2843 {
2844 
2845 	return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
2846 }
2847 
2848 /*
2849  * Change mode of a file given path name (don't follow links.)
2850  */
2851 #ifndef _SYS_SYSPROTO_H_
2852 struct lchmod_args {
2853 	char	*path;
2854 	int	mode;
2855 };
2856 #endif
2857 int
2858 lchmod(td, uap)
2859 	struct thread *td;
2860 	register struct lchmod_args /* {
2861 		char *path;
2862 		int mode;
2863 	} */ *uap;
2864 {
2865 
2866 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2867 	    uap->mode, AT_SYMLINK_NOFOLLOW));
2868 }
2869 
2870 
2871 int
2872 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2873     mode_t mode, int flag)
2874 {
2875 	int error;
2876 	struct nameidata nd;
2877 	int vfslocked;
2878 	int follow;
2879 
2880 	AUDIT_ARG(mode, mode);
2881 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2882 	NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
2883 	    fd, td);
2884 	if ((error = namei(&nd)) != 0)
2885 		return (error);
2886 	vfslocked = NDHASGIANT(&nd);
2887 	NDFREE(&nd, NDF_ONLY_PNBUF);
2888 	error = setfmode(td, nd.ni_vp, mode);
2889 	vrele(nd.ni_vp);
2890 	VFS_UNLOCK_GIANT(vfslocked);
2891 	return (error);
2892 }
2893 
2894 /*
2895  * Change mode of a file given a file descriptor.
2896  */
2897 #ifndef _SYS_SYSPROTO_H_
2898 struct fchmod_args {
2899 	int	fd;
2900 	int	mode;
2901 };
2902 #endif
2903 int
2904 fchmod(td, uap)
2905 	struct thread *td;
2906 	register struct fchmod_args /* {
2907 		int fd;
2908 		int mode;
2909 	} */ *uap;
2910 {
2911 	struct file *fp;
2912 	int vfslocked;
2913 	int error;
2914 
2915 	AUDIT_ARG(fd, uap->fd);
2916 	AUDIT_ARG(mode, uap->mode);
2917 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2918 		return (error);
2919 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2920 #ifdef AUDIT
2921 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2922 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2923 	VOP_UNLOCK(fp->f_vnode, 0);
2924 #endif
2925 	error = setfmode(td, fp->f_vnode, uap->mode);
2926 	VFS_UNLOCK_GIANT(vfslocked);
2927 	fdrop(fp, td);
2928 	return (error);
2929 }
2930 
2931 /*
2932  * Common implementation for chown(), lchown(), and fchown()
2933  */
2934 static int
2935 setfown(td, vp, uid, gid)
2936 	struct thread *td;
2937 	struct vnode *vp;
2938 	uid_t uid;
2939 	gid_t gid;
2940 {
2941 	int error;
2942 	struct mount *mp;
2943 	struct vattr vattr;
2944 
2945 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2946 		return (error);
2947 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2948 	VATTR_NULL(&vattr);
2949 	vattr.va_uid = uid;
2950 	vattr.va_gid = gid;
2951 #ifdef MAC
2952 	error = mac_vnode_check_setowner(td->td_ucred, vp, vattr.va_uid,
2953 	    vattr.va_gid);
2954 	if (error == 0)
2955 #endif
2956 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2957 	VOP_UNLOCK(vp, 0);
2958 	vn_finished_write(mp);
2959 	return (error);
2960 }
2961 
2962 /*
2963  * Set ownership given a path name.
2964  */
2965 #ifndef _SYS_SYSPROTO_H_
2966 struct chown_args {
2967 	char	*path;
2968 	int	uid;
2969 	int	gid;
2970 };
2971 #endif
2972 int
2973 chown(td, uap)
2974 	struct thread *td;
2975 	register struct chown_args /* {
2976 		char *path;
2977 		int uid;
2978 		int gid;
2979 	} */ *uap;
2980 {
2981 
2982 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2983 }
2984 
2985 #ifndef _SYS_SYSPROTO_H_
2986 struct fchownat_args {
2987 	int fd;
2988 	const char * path;
2989 	uid_t uid;
2990 	gid_t gid;
2991 	int flag;
2992 };
2993 #endif
2994 int
2995 fchownat(struct thread *td, struct fchownat_args *uap)
2996 {
2997 	int flag;
2998 
2999 	flag = uap->flag;
3000 	if (flag & ~AT_SYMLINK_NOFOLLOW)
3001 		return (EINVAL);
3002 
3003 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
3004 	    uap->gid, uap->flag));
3005 }
3006 
3007 int
3008 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3009     int gid)
3010 {
3011 
3012 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
3013 }
3014 
3015 int
3016 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3017     int uid, int gid, int flag)
3018 {
3019 	struct nameidata nd;
3020 	int error, vfslocked, follow;
3021 
3022 	AUDIT_ARG(owner, uid, gid);
3023 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3024 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
3025 	    fd, td);
3026 
3027 	if ((error = namei(&nd)) != 0)
3028 		return (error);
3029 	vfslocked = NDHASGIANT(&nd);
3030 	NDFREE(&nd, NDF_ONLY_PNBUF);
3031 	error = setfown(td, nd.ni_vp, uid, gid);
3032 	vrele(nd.ni_vp);
3033 	VFS_UNLOCK_GIANT(vfslocked);
3034 	return (error);
3035 }
3036 
3037 /*
3038  * Set ownership given a path name, do not cross symlinks.
3039  */
3040 #ifndef _SYS_SYSPROTO_H_
3041 struct lchown_args {
3042 	char	*path;
3043 	int	uid;
3044 	int	gid;
3045 };
3046 #endif
3047 int
3048 lchown(td, uap)
3049 	struct thread *td;
3050 	register struct lchown_args /* {
3051 		char *path;
3052 		int uid;
3053 		int gid;
3054 	} */ *uap;
3055 {
3056 
3057 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3058 }
3059 
3060 int
3061 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3062     int gid)
3063 {
3064 
3065 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
3066 	    AT_SYMLINK_NOFOLLOW));
3067 }
3068 
3069 /*
3070  * Set ownership given a file descriptor.
3071  */
3072 #ifndef _SYS_SYSPROTO_H_
3073 struct fchown_args {
3074 	int	fd;
3075 	int	uid;
3076 	int	gid;
3077 };
3078 #endif
3079 int
3080 fchown(td, uap)
3081 	struct thread *td;
3082 	register struct fchown_args /* {
3083 		int fd;
3084 		int uid;
3085 		int gid;
3086 	} */ *uap;
3087 {
3088 	struct file *fp;
3089 	int vfslocked;
3090 	int error;
3091 
3092 	AUDIT_ARG(fd, uap->fd);
3093 	AUDIT_ARG(owner, uap->uid, uap->gid);
3094 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3095 		return (error);
3096 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3097 #ifdef AUDIT
3098 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3099 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
3100 	VOP_UNLOCK(fp->f_vnode, 0);
3101 #endif
3102 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
3103 	VFS_UNLOCK_GIANT(vfslocked);
3104 	fdrop(fp, td);
3105 	return (error);
3106 }
3107 
3108 /*
3109  * Common implementation code for utimes(), lutimes(), and futimes().
3110  */
3111 static int
3112 getutimes(usrtvp, tvpseg, tsp)
3113 	const struct timeval *usrtvp;
3114 	enum uio_seg tvpseg;
3115 	struct timespec *tsp;
3116 {
3117 	struct timeval tv[2];
3118 	const struct timeval *tvp;
3119 	int error;
3120 
3121 	if (usrtvp == NULL) {
3122 		microtime(&tv[0]);
3123 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3124 		tsp[1] = tsp[0];
3125 	} else {
3126 		if (tvpseg == UIO_SYSSPACE) {
3127 			tvp = usrtvp;
3128 		} else {
3129 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
3130 				return (error);
3131 			tvp = tv;
3132 		}
3133 
3134 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
3135 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
3136 			return (EINVAL);
3137 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3138 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3139 	}
3140 	return (0);
3141 }
3142 
3143 /*
3144  * Common implementation code for utimes(), lutimes(), and futimes().
3145  */
3146 static int
3147 setutimes(td, vp, ts, numtimes, nullflag)
3148 	struct thread *td;
3149 	struct vnode *vp;
3150 	const struct timespec *ts;
3151 	int numtimes;
3152 	int nullflag;
3153 {
3154 	int error, setbirthtime;
3155 	struct mount *mp;
3156 	struct vattr vattr;
3157 
3158 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3159 		return (error);
3160 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3161 	setbirthtime = 0;
3162 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3163 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3164 		setbirthtime = 1;
3165 	VATTR_NULL(&vattr);
3166 	vattr.va_atime = ts[0];
3167 	vattr.va_mtime = ts[1];
3168 	if (setbirthtime)
3169 		vattr.va_birthtime = ts[1];
3170 	if (numtimes > 2)
3171 		vattr.va_birthtime = ts[2];
3172 	if (nullflag)
3173 		vattr.va_vaflags |= VA_UTIMES_NULL;
3174 #ifdef MAC
3175 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3176 	    vattr.va_mtime);
3177 #endif
3178 	if (error == 0)
3179 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3180 	VOP_UNLOCK(vp, 0);
3181 	vn_finished_write(mp);
3182 	return (error);
3183 }
3184 
3185 /*
3186  * Set the access and modification times of a file.
3187  */
3188 #ifndef _SYS_SYSPROTO_H_
3189 struct utimes_args {
3190 	char	*path;
3191 	struct	timeval *tptr;
3192 };
3193 #endif
3194 int
3195 utimes(td, uap)
3196 	struct thread *td;
3197 	register struct utimes_args /* {
3198 		char *path;
3199 		struct timeval *tptr;
3200 	} */ *uap;
3201 {
3202 
3203 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3204 	    UIO_USERSPACE));
3205 }
3206 
3207 #ifndef _SYS_SYSPROTO_H_
3208 struct futimesat_args {
3209 	int fd;
3210 	const char * path;
3211 	const struct timeval * times;
3212 };
3213 #endif
3214 int
3215 futimesat(struct thread *td, struct futimesat_args *uap)
3216 {
3217 
3218 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3219 	    uap->times, UIO_USERSPACE));
3220 }
3221 
3222 int
3223 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
3224     struct timeval *tptr, enum uio_seg tptrseg)
3225 {
3226 
3227 	return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
3228 }
3229 
3230 int
3231 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3232     struct timeval *tptr, enum uio_seg tptrseg)
3233 {
3234 	struct nameidata nd;
3235 	struct timespec ts[2];
3236 	int error, vfslocked;
3237 
3238 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3239 		return (error);
3240 	NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
3241 	    fd, td);
3242 
3243 	if ((error = namei(&nd)) != 0)
3244 		return (error);
3245 	vfslocked = NDHASGIANT(&nd);
3246 	NDFREE(&nd, NDF_ONLY_PNBUF);
3247 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3248 	vrele(nd.ni_vp);
3249 	VFS_UNLOCK_GIANT(vfslocked);
3250 	return (error);
3251 }
3252 
3253 /*
3254  * Set the access and modification times of a file.
3255  */
3256 #ifndef _SYS_SYSPROTO_H_
3257 struct lutimes_args {
3258 	char	*path;
3259 	struct	timeval *tptr;
3260 };
3261 #endif
3262 int
3263 lutimes(td, uap)
3264 	struct thread *td;
3265 	register struct lutimes_args /* {
3266 		char *path;
3267 		struct timeval *tptr;
3268 	} */ *uap;
3269 {
3270 
3271 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3272 	    UIO_USERSPACE));
3273 }
3274 
3275 int
3276 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3277     struct timeval *tptr, enum uio_seg tptrseg)
3278 {
3279 	struct timespec ts[2];
3280 	int error;
3281 	struct nameidata nd;
3282 	int vfslocked;
3283 
3284 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3285 		return (error);
3286 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3287 	if ((error = namei(&nd)) != 0)
3288 		return (error);
3289 	vfslocked = NDHASGIANT(&nd);
3290 	NDFREE(&nd, NDF_ONLY_PNBUF);
3291 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3292 	vrele(nd.ni_vp);
3293 	VFS_UNLOCK_GIANT(vfslocked);
3294 	return (error);
3295 }
3296 
3297 /*
3298  * Set the access and modification times of a file.
3299  */
3300 #ifndef _SYS_SYSPROTO_H_
3301 struct futimes_args {
3302 	int	fd;
3303 	struct	timeval *tptr;
3304 };
3305 #endif
3306 int
3307 futimes(td, uap)
3308 	struct thread *td;
3309 	register struct futimes_args /* {
3310 		int  fd;
3311 		struct timeval *tptr;
3312 	} */ *uap;
3313 {
3314 
3315 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3316 }
3317 
3318 int
3319 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3320     enum uio_seg tptrseg)
3321 {
3322 	struct timespec ts[2];
3323 	struct file *fp;
3324 	int vfslocked;
3325 	int error;
3326 
3327 	AUDIT_ARG(fd, fd);
3328 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3329 		return (error);
3330 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
3331 		return (error);
3332 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3333 #ifdef AUDIT
3334 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3335 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
3336 	VOP_UNLOCK(fp->f_vnode, 0);
3337 #endif
3338 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3339 	VFS_UNLOCK_GIANT(vfslocked);
3340 	fdrop(fp, td);
3341 	return (error);
3342 }
3343 
3344 /*
3345  * Truncate a file given its path name.
3346  */
3347 #ifndef _SYS_SYSPROTO_H_
3348 struct truncate_args {
3349 	char	*path;
3350 	int	pad;
3351 	off_t	length;
3352 };
3353 #endif
3354 int
3355 truncate(td, uap)
3356 	struct thread *td;
3357 	register struct truncate_args /* {
3358 		char *path;
3359 		int pad;
3360 		off_t length;
3361 	} */ *uap;
3362 {
3363 
3364 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3365 }
3366 
3367 int
3368 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3369 {
3370 	struct mount *mp;
3371 	struct vnode *vp;
3372 	struct vattr vattr;
3373 	int error;
3374 	struct nameidata nd;
3375 	int vfslocked;
3376 
3377 	if (length < 0)
3378 		return(EINVAL);
3379 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3380 	if ((error = namei(&nd)) != 0)
3381 		return (error);
3382 	vfslocked = NDHASGIANT(&nd);
3383 	vp = nd.ni_vp;
3384 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3385 		vrele(vp);
3386 		VFS_UNLOCK_GIANT(vfslocked);
3387 		return (error);
3388 	}
3389 	NDFREE(&nd, NDF_ONLY_PNBUF);
3390 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3391 	if (vp->v_type == VDIR)
3392 		error = EISDIR;
3393 #ifdef MAC
3394 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3395 	}
3396 #endif
3397 	else if ((error = vn_writechk(vp)) == 0 &&
3398 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3399 		VATTR_NULL(&vattr);
3400 		vattr.va_size = length;
3401 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3402 	}
3403 	vput(vp);
3404 	vn_finished_write(mp);
3405 	VFS_UNLOCK_GIANT(vfslocked);
3406 	return (error);
3407 }
3408 
3409 #if defined(COMPAT_43)
3410 /*
3411  * Truncate a file given its path name.
3412  */
3413 #ifndef _SYS_SYSPROTO_H_
3414 struct otruncate_args {
3415 	char	*path;
3416 	long	length;
3417 };
3418 #endif
3419 int
3420 otruncate(td, uap)
3421 	struct thread *td;
3422 	register struct otruncate_args /* {
3423 		char *path;
3424 		long length;
3425 	} */ *uap;
3426 {
3427 	struct truncate_args /* {
3428 		char *path;
3429 		int pad;
3430 		off_t length;
3431 	} */ nuap;
3432 
3433 	nuap.path = uap->path;
3434 	nuap.length = uap->length;
3435 	return (truncate(td, &nuap));
3436 }
3437 #endif /* COMPAT_43 */
3438 
3439 /* Versions with the pad argument */
3440 int
3441 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3442 {
3443 	struct truncate_args ouap;
3444 
3445 	ouap.path = uap->path;
3446 	ouap.length = uap->length;
3447 	return (truncate(td, &ouap));
3448 }
3449 
3450 int
3451 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3452 {
3453 	struct ftruncate_args ouap;
3454 
3455 	ouap.fd = uap->fd;
3456 	ouap.length = uap->length;
3457 	return (ftruncate(td, &ouap));
3458 }
3459 
3460 /*
3461  * Sync an open file.
3462  */
3463 #ifndef _SYS_SYSPROTO_H_
3464 struct fsync_args {
3465 	int	fd;
3466 };
3467 #endif
3468 int
3469 fsync(td, uap)
3470 	struct thread *td;
3471 	struct fsync_args /* {
3472 		int fd;
3473 	} */ *uap;
3474 {
3475 	struct vnode *vp;
3476 	struct mount *mp;
3477 	struct file *fp;
3478 	int vfslocked;
3479 	int error;
3480 
3481 	AUDIT_ARG(fd, uap->fd);
3482 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3483 		return (error);
3484 	vp = fp->f_vnode;
3485 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3486 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3487 		goto drop;
3488 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3489 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3490 	if (vp->v_object != NULL) {
3491 		VM_OBJECT_LOCK(vp->v_object);
3492 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3493 		VM_OBJECT_UNLOCK(vp->v_object);
3494 	}
3495 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3496 
3497 	VOP_UNLOCK(vp, 0);
3498 	vn_finished_write(mp);
3499 drop:
3500 	VFS_UNLOCK_GIANT(vfslocked);
3501 	fdrop(fp, td);
3502 	return (error);
3503 }
3504 
3505 /*
3506  * Rename files.  Source and destination must either both be directories, or
3507  * both not be directories.  If target is a directory, it must be empty.
3508  */
3509 #ifndef _SYS_SYSPROTO_H_
3510 struct rename_args {
3511 	char	*from;
3512 	char	*to;
3513 };
3514 #endif
3515 int
3516 rename(td, uap)
3517 	struct thread *td;
3518 	register struct rename_args /* {
3519 		char *from;
3520 		char *to;
3521 	} */ *uap;
3522 {
3523 
3524 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3525 }
3526 
3527 #ifndef _SYS_SYSPROTO_H_
3528 struct renameat_args {
3529 	int	oldfd;
3530 	char	*old;
3531 	int	newfd;
3532 	char	*new;
3533 };
3534 #endif
3535 int
3536 renameat(struct thread *td, struct renameat_args *uap)
3537 {
3538 
3539 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3540 	    UIO_USERSPACE));
3541 }
3542 
3543 int
3544 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3545 {
3546 
3547 	return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
3548 }
3549 
3550 int
3551 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3552     enum uio_seg pathseg)
3553 {
3554 	struct mount *mp = NULL;
3555 	struct vnode *tvp, *fvp, *tdvp;
3556 	struct nameidata fromnd, tond;
3557 	int tvfslocked;
3558 	int fvfslocked;
3559 	int error;
3560 
3561 	bwillwrite();
3562 #ifdef MAC
3563 	NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3564 	    AUDITVNODE1, pathseg, old, oldfd, td);
3565 #else
3566 	NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3567 	    AUDITVNODE1, pathseg, old, oldfd, td);
3568 #endif
3569 
3570 	if ((error = namei(&fromnd)) != 0)
3571 		return (error);
3572 	fvfslocked = NDHASGIANT(&fromnd);
3573 	tvfslocked = 0;
3574 #ifdef MAC
3575 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3576 	    fromnd.ni_vp, &fromnd.ni_cnd);
3577 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3578 	if (fromnd.ni_dvp != fromnd.ni_vp)
3579 		VOP_UNLOCK(fromnd.ni_vp, 0);
3580 #endif
3581 	fvp = fromnd.ni_vp;
3582 	if (error == 0)
3583 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3584 	if (error != 0) {
3585 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3586 		vrele(fromnd.ni_dvp);
3587 		vrele(fvp);
3588 		goto out1;
3589 	}
3590 	NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3591 	    MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
3592 	if (fromnd.ni_vp->v_type == VDIR)
3593 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3594 	if ((error = namei(&tond)) != 0) {
3595 		/* Translate error code for rename("dir1", "dir2/."). */
3596 		if (error == EISDIR && fvp->v_type == VDIR)
3597 			error = EINVAL;
3598 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3599 		vrele(fromnd.ni_dvp);
3600 		vrele(fvp);
3601 		vn_finished_write(mp);
3602 		goto out1;
3603 	}
3604 	tvfslocked = NDHASGIANT(&tond);
3605 	tdvp = tond.ni_dvp;
3606 	tvp = tond.ni_vp;
3607 	if (tvp != NULL) {
3608 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3609 			error = ENOTDIR;
3610 			goto out;
3611 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3612 			error = EISDIR;
3613 			goto out;
3614 		}
3615 	}
3616 	if (fvp == tdvp) {
3617 		error = EINVAL;
3618 		goto out;
3619 	}
3620 	/*
3621 	 * If the source is the same as the destination (that is, if they
3622 	 * are links to the same vnode), then there is nothing to do.
3623 	 */
3624 	if (fvp == tvp)
3625 		error = -1;
3626 #ifdef MAC
3627 	else
3628 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3629 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3630 #endif
3631 out:
3632 	if (!error) {
3633 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3634 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3635 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3636 		NDFREE(&tond, NDF_ONLY_PNBUF);
3637 	} else {
3638 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3639 		NDFREE(&tond, NDF_ONLY_PNBUF);
3640 		if (tvp)
3641 			vput(tvp);
3642 		if (tdvp == tvp)
3643 			vrele(tdvp);
3644 		else
3645 			vput(tdvp);
3646 		vrele(fromnd.ni_dvp);
3647 		vrele(fvp);
3648 	}
3649 	vrele(tond.ni_startdir);
3650 	vn_finished_write(mp);
3651 out1:
3652 	if (fromnd.ni_startdir)
3653 		vrele(fromnd.ni_startdir);
3654 	VFS_UNLOCK_GIANT(fvfslocked);
3655 	VFS_UNLOCK_GIANT(tvfslocked);
3656 	if (error == -1)
3657 		return (0);
3658 	return (error);
3659 }
3660 
3661 /*
3662  * Make a directory file.
3663  */
3664 #ifndef _SYS_SYSPROTO_H_
3665 struct mkdir_args {
3666 	char	*path;
3667 	int	mode;
3668 };
3669 #endif
3670 int
3671 mkdir(td, uap)
3672 	struct thread *td;
3673 	register struct mkdir_args /* {
3674 		char *path;
3675 		int mode;
3676 	} */ *uap;
3677 {
3678 
3679 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3680 }
3681 
3682 #ifndef _SYS_SYSPROTO_H_
3683 struct mkdirat_args {
3684 	int	fd;
3685 	char	*path;
3686 	mode_t	mode;
3687 };
3688 #endif
3689 int
3690 mkdirat(struct thread *td, struct mkdirat_args *uap)
3691 {
3692 
3693 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3694 }
3695 
3696 int
3697 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3698 {
3699 
3700 	return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
3701 }
3702 
3703 int
3704 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3705     int mode)
3706 {
3707 	struct mount *mp;
3708 	struct vnode *vp;
3709 	struct vattr vattr;
3710 	int error;
3711 	struct nameidata nd;
3712 	int vfslocked;
3713 
3714 	AUDIT_ARG(mode, mode);
3715 restart:
3716 	bwillwrite();
3717 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3718 	    segflg, path, fd, td);
3719 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3720 	if ((error = namei(&nd)) != 0)
3721 		return (error);
3722 	vfslocked = NDHASGIANT(&nd);
3723 	vp = nd.ni_vp;
3724 	if (vp != NULL) {
3725 		NDFREE(&nd, NDF_ONLY_PNBUF);
3726 		/*
3727 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3728 		 * the strange behaviour of leaving the vnode unlocked
3729 		 * if the target is the same vnode as the parent.
3730 		 */
3731 		if (vp == nd.ni_dvp)
3732 			vrele(nd.ni_dvp);
3733 		else
3734 			vput(nd.ni_dvp);
3735 		vrele(vp);
3736 		VFS_UNLOCK_GIANT(vfslocked);
3737 		return (EEXIST);
3738 	}
3739 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3740 		NDFREE(&nd, NDF_ONLY_PNBUF);
3741 		vput(nd.ni_dvp);
3742 		VFS_UNLOCK_GIANT(vfslocked);
3743 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3744 			return (error);
3745 		goto restart;
3746 	}
3747 	VATTR_NULL(&vattr);
3748 	vattr.va_type = VDIR;
3749 	FILEDESC_SLOCK(td->td_proc->p_fd);
3750 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3751 	FILEDESC_SUNLOCK(td->td_proc->p_fd);
3752 #ifdef MAC
3753 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3754 	    &vattr);
3755 	if (error)
3756 		goto out;
3757 #endif
3758 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3759 #ifdef MAC
3760 out:
3761 #endif
3762 	NDFREE(&nd, NDF_ONLY_PNBUF);
3763 	vput(nd.ni_dvp);
3764 	if (!error)
3765 		vput(nd.ni_vp);
3766 	vn_finished_write(mp);
3767 	VFS_UNLOCK_GIANT(vfslocked);
3768 	return (error);
3769 }
3770 
3771 /*
3772  * Remove a directory file.
3773  */
3774 #ifndef _SYS_SYSPROTO_H_
3775 struct rmdir_args {
3776 	char	*path;
3777 };
3778 #endif
3779 int
3780 rmdir(td, uap)
3781 	struct thread *td;
3782 	struct rmdir_args /* {
3783 		char *path;
3784 	} */ *uap;
3785 {
3786 
3787 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3788 }
3789 
3790 int
3791 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3792 {
3793 
3794 	return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
3795 }
3796 
3797 int
3798 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3799 {
3800 	struct mount *mp;
3801 	struct vnode *vp;
3802 	int error;
3803 	struct nameidata nd;
3804 	int vfslocked;
3805 
3806 restart:
3807 	bwillwrite();
3808 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3809 	    pathseg, path, fd, td);
3810 	if ((error = namei(&nd)) != 0)
3811 		return (error);
3812 	vfslocked = NDHASGIANT(&nd);
3813 	vp = nd.ni_vp;
3814 	if (vp->v_type != VDIR) {
3815 		error = ENOTDIR;
3816 		goto out;
3817 	}
3818 	/*
3819 	 * No rmdir "." please.
3820 	 */
3821 	if (nd.ni_dvp == vp) {
3822 		error = EINVAL;
3823 		goto out;
3824 	}
3825 	/*
3826 	 * The root of a mounted filesystem cannot be deleted.
3827 	 */
3828 	if (vp->v_vflag & VV_ROOT) {
3829 		error = EBUSY;
3830 		goto out;
3831 	}
3832 #ifdef MAC
3833 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3834 	    &nd.ni_cnd);
3835 	if (error)
3836 		goto out;
3837 #endif
3838 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3839 		NDFREE(&nd, NDF_ONLY_PNBUF);
3840 		vput(vp);
3841 		if (nd.ni_dvp == vp)
3842 			vrele(nd.ni_dvp);
3843 		else
3844 			vput(nd.ni_dvp);
3845 		VFS_UNLOCK_GIANT(vfslocked);
3846 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3847 			return (error);
3848 		goto restart;
3849 	}
3850 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3851 	vn_finished_write(mp);
3852 out:
3853 	NDFREE(&nd, NDF_ONLY_PNBUF);
3854 	vput(vp);
3855 	if (nd.ni_dvp == vp)
3856 		vrele(nd.ni_dvp);
3857 	else
3858 		vput(nd.ni_dvp);
3859 	VFS_UNLOCK_GIANT(vfslocked);
3860 	return (error);
3861 }
3862 
3863 #ifdef COMPAT_43
3864 /*
3865  * Read a block of directory entries in a filesystem independent format.
3866  */
3867 #ifndef _SYS_SYSPROTO_H_
3868 struct ogetdirentries_args {
3869 	int	fd;
3870 	char	*buf;
3871 	u_int	count;
3872 	long	*basep;
3873 };
3874 #endif
3875 int
3876 ogetdirentries(td, uap)
3877 	struct thread *td;
3878 	register struct ogetdirentries_args /* {
3879 		int fd;
3880 		char *buf;
3881 		u_int count;
3882 		long *basep;
3883 	} */ *uap;
3884 {
3885 	struct vnode *vp;
3886 	struct file *fp;
3887 	struct uio auio, kuio;
3888 	struct iovec aiov, kiov;
3889 	struct dirent *dp, *edp;
3890 	caddr_t dirbuf;
3891 	int error, eofflag, readcnt, vfslocked;
3892 	long loff;
3893 
3894 	/* XXX arbitrary sanity limit on `count'. */
3895 	if (uap->count > 64 * 1024)
3896 		return (EINVAL);
3897 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3898 		return (error);
3899 	if ((fp->f_flag & FREAD) == 0) {
3900 		fdrop(fp, td);
3901 		return (EBADF);
3902 	}
3903 	vp = fp->f_vnode;
3904 unionread:
3905 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3906 	if (vp->v_type != VDIR) {
3907 		VFS_UNLOCK_GIANT(vfslocked);
3908 		fdrop(fp, td);
3909 		return (EINVAL);
3910 	}
3911 	aiov.iov_base = uap->buf;
3912 	aiov.iov_len = uap->count;
3913 	auio.uio_iov = &aiov;
3914 	auio.uio_iovcnt = 1;
3915 	auio.uio_rw = UIO_READ;
3916 	auio.uio_segflg = UIO_USERSPACE;
3917 	auio.uio_td = td;
3918 	auio.uio_resid = uap->count;
3919 	vn_lock(vp, LK_SHARED | LK_RETRY);
3920 	loff = auio.uio_offset = fp->f_offset;
3921 #ifdef MAC
3922 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3923 	if (error) {
3924 		VOP_UNLOCK(vp, 0);
3925 		VFS_UNLOCK_GIANT(vfslocked);
3926 		fdrop(fp, td);
3927 		return (error);
3928 	}
3929 #endif
3930 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3931 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3932 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3933 			    NULL, NULL);
3934 			fp->f_offset = auio.uio_offset;
3935 		} else
3936 #	endif
3937 	{
3938 		kuio = auio;
3939 		kuio.uio_iov = &kiov;
3940 		kuio.uio_segflg = UIO_SYSSPACE;
3941 		kiov.iov_len = uap->count;
3942 		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
3943 		kiov.iov_base = dirbuf;
3944 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3945 			    NULL, NULL);
3946 		fp->f_offset = kuio.uio_offset;
3947 		if (error == 0) {
3948 			readcnt = uap->count - kuio.uio_resid;
3949 			edp = (struct dirent *)&dirbuf[readcnt];
3950 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3951 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3952 					/*
3953 					 * The expected low byte of
3954 					 * dp->d_namlen is our dp->d_type.
3955 					 * The high MBZ byte of dp->d_namlen
3956 					 * is our dp->d_namlen.
3957 					 */
3958 					dp->d_type = dp->d_namlen;
3959 					dp->d_namlen = 0;
3960 #				else
3961 					/*
3962 					 * The dp->d_type is the high byte
3963 					 * of the expected dp->d_namlen,
3964 					 * so must be zero'ed.
3965 					 */
3966 					dp->d_type = 0;
3967 #				endif
3968 				if (dp->d_reclen > 0) {
3969 					dp = (struct dirent *)
3970 					    ((char *)dp + dp->d_reclen);
3971 				} else {
3972 					error = EIO;
3973 					break;
3974 				}
3975 			}
3976 			if (dp >= edp)
3977 				error = uiomove(dirbuf, readcnt, &auio);
3978 		}
3979 		free(dirbuf, M_TEMP);
3980 	}
3981 	if (error) {
3982 		VOP_UNLOCK(vp, 0);
3983 		VFS_UNLOCK_GIANT(vfslocked);
3984 		fdrop(fp, td);
3985 		return (error);
3986 	}
3987 	if (uap->count == auio.uio_resid &&
3988 	    (vp->v_vflag & VV_ROOT) &&
3989 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3990 		struct vnode *tvp = vp;
3991 		vp = vp->v_mount->mnt_vnodecovered;
3992 		VREF(vp);
3993 		fp->f_vnode = vp;
3994 		fp->f_data = vp;
3995 		fp->f_offset = 0;
3996 		vput(tvp);
3997 		VFS_UNLOCK_GIANT(vfslocked);
3998 		goto unionread;
3999 	}
4000 	VOP_UNLOCK(vp, 0);
4001 	VFS_UNLOCK_GIANT(vfslocked);
4002 	error = copyout(&loff, uap->basep, sizeof(long));
4003 	fdrop(fp, td);
4004 	td->td_retval[0] = uap->count - auio.uio_resid;
4005 	return (error);
4006 }
4007 #endif /* COMPAT_43 */
4008 
4009 /*
4010  * Read a block of directory entries in a filesystem independent format.
4011  */
4012 #ifndef _SYS_SYSPROTO_H_
4013 struct getdirentries_args {
4014 	int	fd;
4015 	char	*buf;
4016 	u_int	count;
4017 	long	*basep;
4018 };
4019 #endif
4020 int
4021 getdirentries(td, uap)
4022 	struct thread *td;
4023 	register struct getdirentries_args /* {
4024 		int fd;
4025 		char *buf;
4026 		u_int count;
4027 		long *basep;
4028 	} */ *uap;
4029 {
4030 	long base;
4031 	int error;
4032 
4033 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
4034 	if (error)
4035 		return (error);
4036 	if (uap->basep != NULL)
4037 		error = copyout(&base, uap->basep, sizeof(long));
4038 	return (error);
4039 }
4040 
4041 int
4042 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
4043     long *basep)
4044 {
4045 	struct vnode *vp;
4046 	struct file *fp;
4047 	struct uio auio;
4048 	struct iovec aiov;
4049 	int vfslocked;
4050 	long loff;
4051 	int error, eofflag;
4052 
4053 	AUDIT_ARG(fd, fd);
4054 	if (count > INT_MAX)
4055 		return (EINVAL);
4056 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
4057 		return (error);
4058 	if ((fp->f_flag & FREAD) == 0) {
4059 		fdrop(fp, td);
4060 		return (EBADF);
4061 	}
4062 	vp = fp->f_vnode;
4063 unionread:
4064 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4065 	if (vp->v_type != VDIR) {
4066 		VFS_UNLOCK_GIANT(vfslocked);
4067 		error = EINVAL;
4068 		goto fail;
4069 	}
4070 	aiov.iov_base = buf;
4071 	aiov.iov_len = count;
4072 	auio.uio_iov = &aiov;
4073 	auio.uio_iovcnt = 1;
4074 	auio.uio_rw = UIO_READ;
4075 	auio.uio_segflg = UIO_USERSPACE;
4076 	auio.uio_td = td;
4077 	auio.uio_resid = count;
4078 	vn_lock(vp, LK_SHARED | LK_RETRY);
4079 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
4080 	loff = auio.uio_offset = fp->f_offset;
4081 #ifdef MAC
4082 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4083 	if (error == 0)
4084 #endif
4085 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4086 		    NULL);
4087 	fp->f_offset = auio.uio_offset;
4088 	if (error) {
4089 		VOP_UNLOCK(vp, 0);
4090 		VFS_UNLOCK_GIANT(vfslocked);
4091 		goto fail;
4092 	}
4093 	if (count == auio.uio_resid &&
4094 	    (vp->v_vflag & VV_ROOT) &&
4095 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4096 		struct vnode *tvp = vp;
4097 		vp = vp->v_mount->mnt_vnodecovered;
4098 		VREF(vp);
4099 		fp->f_vnode = vp;
4100 		fp->f_data = vp;
4101 		fp->f_offset = 0;
4102 		vput(tvp);
4103 		VFS_UNLOCK_GIANT(vfslocked);
4104 		goto unionread;
4105 	}
4106 	VOP_UNLOCK(vp, 0);
4107 	VFS_UNLOCK_GIANT(vfslocked);
4108 	*basep = loff;
4109 	td->td_retval[0] = count - auio.uio_resid;
4110 fail:
4111 	fdrop(fp, td);
4112 	return (error);
4113 }
4114 
4115 #ifndef _SYS_SYSPROTO_H_
4116 struct getdents_args {
4117 	int fd;
4118 	char *buf;
4119 	size_t count;
4120 };
4121 #endif
4122 int
4123 getdents(td, uap)
4124 	struct thread *td;
4125 	register struct getdents_args /* {
4126 		int fd;
4127 		char *buf;
4128 		u_int count;
4129 	} */ *uap;
4130 {
4131 	struct getdirentries_args ap;
4132 	ap.fd = uap->fd;
4133 	ap.buf = uap->buf;
4134 	ap.count = uap->count;
4135 	ap.basep = NULL;
4136 	return (getdirentries(td, &ap));
4137 }
4138 
4139 /*
4140  * Set the mode mask for creation of filesystem nodes.
4141  */
4142 #ifndef _SYS_SYSPROTO_H_
4143 struct umask_args {
4144 	int	newmask;
4145 };
4146 #endif
4147 int
4148 umask(td, uap)
4149 	struct thread *td;
4150 	struct umask_args /* {
4151 		int newmask;
4152 	} */ *uap;
4153 {
4154 	register struct filedesc *fdp;
4155 
4156 	FILEDESC_XLOCK(td->td_proc->p_fd);
4157 	fdp = td->td_proc->p_fd;
4158 	td->td_retval[0] = fdp->fd_cmask;
4159 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4160 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
4161 	return (0);
4162 }
4163 
4164 /*
4165  * Void all references to file by ripping underlying filesystem away from
4166  * vnode.
4167  */
4168 #ifndef _SYS_SYSPROTO_H_
4169 struct revoke_args {
4170 	char	*path;
4171 };
4172 #endif
4173 int
4174 revoke(td, uap)
4175 	struct thread *td;
4176 	register struct revoke_args /* {
4177 		char *path;
4178 	} */ *uap;
4179 {
4180 	struct vnode *vp;
4181 	struct vattr vattr;
4182 	int error;
4183 	struct nameidata nd;
4184 	int vfslocked;
4185 
4186 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4187 	    UIO_USERSPACE, uap->path, td);
4188 	if ((error = namei(&nd)) != 0)
4189 		return (error);
4190 	vfslocked = NDHASGIANT(&nd);
4191 	vp = nd.ni_vp;
4192 	NDFREE(&nd, NDF_ONLY_PNBUF);
4193 	if (vp->v_type != VCHR) {
4194 		error = EINVAL;
4195 		goto out;
4196 	}
4197 #ifdef MAC
4198 	error = mac_vnode_check_revoke(td->td_ucred, vp);
4199 	if (error)
4200 		goto out;
4201 #endif
4202 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4203 	if (error)
4204 		goto out;
4205 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4206 		error = priv_check(td, PRIV_VFS_ADMIN);
4207 		if (error)
4208 			goto out;
4209 	}
4210 	if (vcount(vp) > 1)
4211 		VOP_REVOKE(vp, REVOKEALL);
4212 out:
4213 	vput(vp);
4214 	VFS_UNLOCK_GIANT(vfslocked);
4215 	return (error);
4216 }
4217 
4218 /*
4219  * Convert a user file descriptor to a kernel file entry.
4220  * A reference on the file entry is held upon returning.
4221  */
4222 int
4223 getvnode(fdp, fd, fpp)
4224 	struct filedesc *fdp;
4225 	int fd;
4226 	struct file **fpp;
4227 {
4228 	int error;
4229 	struct file *fp;
4230 
4231 	error = 0;
4232 	fp = NULL;
4233 	if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
4234 		error = EBADF;
4235 	else if (fp->f_vnode == NULL) {
4236 		error = EINVAL;
4237 		fdrop(fp, curthread);
4238 	}
4239 	*fpp = fp;
4240 	return (error);
4241 }
4242 
4243 /*
4244  * Get an (NFS) file handle.
4245  */
4246 #ifndef _SYS_SYSPROTO_H_
4247 struct lgetfh_args {
4248 	char	*fname;
4249 	fhandle_t *fhp;
4250 };
4251 #endif
4252 int
4253 lgetfh(td, uap)
4254 	struct thread *td;
4255 	register struct lgetfh_args *uap;
4256 {
4257 	struct nameidata nd;
4258 	fhandle_t fh;
4259 	register struct vnode *vp;
4260 	int vfslocked;
4261 	int error;
4262 
4263 	error = priv_check(td, PRIV_VFS_GETFH);
4264 	if (error)
4265 		return (error);
4266 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4267 	    UIO_USERSPACE, uap->fname, td);
4268 	error = namei(&nd);
4269 	if (error)
4270 		return (error);
4271 	vfslocked = NDHASGIANT(&nd);
4272 	NDFREE(&nd, NDF_ONLY_PNBUF);
4273 	vp = nd.ni_vp;
4274 	bzero(&fh, sizeof(fh));
4275 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4276 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4277 	vput(vp);
4278 	VFS_UNLOCK_GIANT(vfslocked);
4279 	if (error)
4280 		return (error);
4281 	error = copyout(&fh, uap->fhp, sizeof (fh));
4282 	return (error);
4283 }
4284 
4285 #ifndef _SYS_SYSPROTO_H_
4286 struct getfh_args {
4287 	char	*fname;
4288 	fhandle_t *fhp;
4289 };
4290 #endif
4291 int
4292 getfh(td, uap)
4293 	struct thread *td;
4294 	register struct getfh_args *uap;
4295 {
4296 	struct nameidata nd;
4297 	fhandle_t fh;
4298 	register struct vnode *vp;
4299 	int vfslocked;
4300 	int error;
4301 
4302 	error = priv_check(td, PRIV_VFS_GETFH);
4303 	if (error)
4304 		return (error);
4305 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4306 	    UIO_USERSPACE, uap->fname, td);
4307 	error = namei(&nd);
4308 	if (error)
4309 		return (error);
4310 	vfslocked = NDHASGIANT(&nd);
4311 	NDFREE(&nd, NDF_ONLY_PNBUF);
4312 	vp = nd.ni_vp;
4313 	bzero(&fh, sizeof(fh));
4314 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4315 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4316 	vput(vp);
4317 	VFS_UNLOCK_GIANT(vfslocked);
4318 	if (error)
4319 		return (error);
4320 	error = copyout(&fh, uap->fhp, sizeof (fh));
4321 	return (error);
4322 }
4323 
4324 /*
4325  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4326  * open descriptor.
4327  *
4328  * warning: do not remove the priv_check() call or this becomes one giant
4329  * security hole.
4330  */
4331 #ifndef _SYS_SYSPROTO_H_
4332 struct fhopen_args {
4333 	const struct fhandle *u_fhp;
4334 	int flags;
4335 };
4336 #endif
4337 int
4338 fhopen(td, uap)
4339 	struct thread *td;
4340 	struct fhopen_args /* {
4341 		const struct fhandle *u_fhp;
4342 		int flags;
4343 	} */ *uap;
4344 {
4345 	struct proc *p = td->td_proc;
4346 	struct mount *mp;
4347 	struct vnode *vp;
4348 	struct fhandle fhp;
4349 	struct vattr vat;
4350 	struct vattr *vap = &vat;
4351 	struct flock lf;
4352 	struct file *fp;
4353 	register struct filedesc *fdp = p->p_fd;
4354 	int fmode, error, type;
4355 	accmode_t accmode;
4356 	struct file *nfp;
4357 	int vfslocked;
4358 	int indx;
4359 
4360 	error = priv_check(td, PRIV_VFS_FHOPEN);
4361 	if (error)
4362 		return (error);
4363 	fmode = FFLAGS(uap->flags);
4364 	/* why not allow a non-read/write open for our lockd? */
4365 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4366 		return (EINVAL);
4367 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4368 	if (error)
4369 		return(error);
4370 	/* find the mount point */
4371 	mp = vfs_busyfs(&fhp.fh_fsid);
4372 	if (mp == NULL)
4373 		return (ESTALE);
4374 	vfslocked = VFS_LOCK_GIANT(mp);
4375 	/* now give me my vnode, it gets returned to me locked */
4376 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4377 	vfs_unbusy(mp);
4378 	if (error)
4379 		goto out;
4380 	/*
4381 	 * from now on we have to make sure not
4382 	 * to forget about the vnode
4383 	 * any error that causes an abort must vput(vp)
4384 	 * just set error = err and 'goto bad;'.
4385 	 */
4386 
4387 	/*
4388 	 * from vn_open
4389 	 */
4390 	if (vp->v_type == VLNK) {
4391 		error = EMLINK;
4392 		goto bad;
4393 	}
4394 	if (vp->v_type == VSOCK) {
4395 		error = EOPNOTSUPP;
4396 		goto bad;
4397 	}
4398 	accmode = 0;
4399 	if (fmode & (FWRITE | O_TRUNC)) {
4400 		if (vp->v_type == VDIR) {
4401 			error = EISDIR;
4402 			goto bad;
4403 		}
4404 		error = vn_writechk(vp);
4405 		if (error)
4406 			goto bad;
4407 		accmode |= VWRITE;
4408 	}
4409 	if (fmode & FREAD)
4410 		accmode |= VREAD;
4411 	if (fmode & O_APPEND)
4412 		accmode |= VAPPEND;
4413 #ifdef MAC
4414 	error = mac_vnode_check_open(td->td_ucred, vp, accmode);
4415 	if (error)
4416 		goto bad;
4417 #endif
4418 	if (accmode) {
4419 		error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
4420 		if (error)
4421 			goto bad;
4422 	}
4423 	if (fmode & O_TRUNC) {
4424 		VOP_UNLOCK(vp, 0);				/* XXX */
4425 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4426 			vrele(vp);
4427 			goto out;
4428 		}
4429 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4430 #ifdef MAC
4431 		/*
4432 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4433 		 * should be right.
4434 		 */
4435 		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4436 		if (error == 0) {
4437 #endif
4438 			VATTR_NULL(vap);
4439 			vap->va_size = 0;
4440 			error = VOP_SETATTR(vp, vap, td->td_ucred);
4441 #ifdef MAC
4442 		}
4443 #endif
4444 		vn_finished_write(mp);
4445 		if (error)
4446 			goto bad;
4447 	}
4448 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4449 	if (error)
4450 		goto bad;
4451 
4452 	if (fmode & FWRITE)
4453 		vp->v_writecount++;
4454 
4455 	/*
4456 	 * end of vn_open code
4457 	 */
4458 
4459 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4460 		if (fmode & FWRITE)
4461 			vp->v_writecount--;
4462 		goto bad;
4463 	}
4464 	/* An extra reference on `nfp' has been held for us by falloc(). */
4465 	fp = nfp;
4466 	nfp->f_vnode = vp;
4467 	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4468 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4469 		lf.l_whence = SEEK_SET;
4470 		lf.l_start = 0;
4471 		lf.l_len = 0;
4472 		if (fmode & O_EXLOCK)
4473 			lf.l_type = F_WRLCK;
4474 		else
4475 			lf.l_type = F_RDLCK;
4476 		type = F_FLOCK;
4477 		if ((fmode & FNONBLOCK) == 0)
4478 			type |= F_WAIT;
4479 		VOP_UNLOCK(vp, 0);
4480 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4481 			    type)) != 0) {
4482 			/*
4483 			 * The lock request failed.  Normally close the
4484 			 * descriptor but handle the case where someone might
4485 			 * have dup()d or close()d it when we weren't looking.
4486 			 */
4487 			fdclose(fdp, fp, indx, td);
4488 
4489 			/*
4490 			 * release our private reference
4491 			 */
4492 			fdrop(fp, td);
4493 			goto out;
4494 		}
4495 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4496 		atomic_set_int(&fp->f_flag, FHASLOCK);
4497 	}
4498 
4499 	VOP_UNLOCK(vp, 0);
4500 	fdrop(fp, td);
4501 	vfs_rel(mp);
4502 	VFS_UNLOCK_GIANT(vfslocked);
4503 	td->td_retval[0] = indx;
4504 	return (0);
4505 
4506 bad:
4507 	vput(vp);
4508 out:
4509 	VFS_UNLOCK_GIANT(vfslocked);
4510 	return (error);
4511 }
4512 
4513 /*
4514  * Stat an (NFS) file handle.
4515  */
4516 #ifndef _SYS_SYSPROTO_H_
4517 struct fhstat_args {
4518 	struct fhandle *u_fhp;
4519 	struct stat *sb;
4520 };
4521 #endif
4522 int
4523 fhstat(td, uap)
4524 	struct thread *td;
4525 	register struct fhstat_args /* {
4526 		struct fhandle *u_fhp;
4527 		struct stat *sb;
4528 	} */ *uap;
4529 {
4530 	struct stat sb;
4531 	fhandle_t fh;
4532 	struct mount *mp;
4533 	struct vnode *vp;
4534 	int vfslocked;
4535 	int error;
4536 
4537 	error = priv_check(td, PRIV_VFS_FHSTAT);
4538 	if (error)
4539 		return (error);
4540 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4541 	if (error)
4542 		return (error);
4543 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4544 		return (ESTALE);
4545 	vfslocked = VFS_LOCK_GIANT(mp);
4546 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4547 	vfs_unbusy(mp);
4548 	if (error) {
4549 		VFS_UNLOCK_GIANT(vfslocked);
4550 		return (error);
4551 	}
4552 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4553 	vput(vp);
4554 	VFS_UNLOCK_GIANT(vfslocked);
4555 	if (error)
4556 		return (error);
4557 	error = copyout(&sb, uap->sb, sizeof(sb));
4558 	return (error);
4559 }
4560 
4561 /*
4562  * Implement fstatfs() for (NFS) file handles.
4563  */
4564 #ifndef _SYS_SYSPROTO_H_
4565 struct fhstatfs_args {
4566 	struct fhandle *u_fhp;
4567 	struct statfs *buf;
4568 };
4569 #endif
4570 int
4571 fhstatfs(td, uap)
4572 	struct thread *td;
4573 	struct fhstatfs_args /* {
4574 		struct fhandle *u_fhp;
4575 		struct statfs *buf;
4576 	} */ *uap;
4577 {
4578 	struct statfs sf;
4579 	fhandle_t fh;
4580 	int error;
4581 
4582 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4583 	if (error)
4584 		return (error);
4585 	error = kern_fhstatfs(td, fh, &sf);
4586 	if (error)
4587 		return (error);
4588 	return (copyout(&sf, uap->buf, sizeof(sf)));
4589 }
4590 
4591 int
4592 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4593 {
4594 	struct statfs *sp;
4595 	struct mount *mp;
4596 	struct vnode *vp;
4597 	int vfslocked;
4598 	int error;
4599 
4600 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4601 	if (error)
4602 		return (error);
4603 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4604 		return (ESTALE);
4605 	vfslocked = VFS_LOCK_GIANT(mp);
4606 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4607 	if (error) {
4608 		vfs_unbusy(mp);
4609 		VFS_UNLOCK_GIANT(vfslocked);
4610 		return (error);
4611 	}
4612 	vput(vp);
4613 	error = prison_canseemount(td->td_ucred, mp);
4614 	if (error)
4615 		goto out;
4616 #ifdef MAC
4617 	error = mac_mount_check_stat(td->td_ucred, mp);
4618 	if (error)
4619 		goto out;
4620 #endif
4621 	/*
4622 	 * Set these in case the underlying filesystem fails to do so.
4623 	 */
4624 	sp = &mp->mnt_stat;
4625 	sp->f_version = STATFS_VERSION;
4626 	sp->f_namemax = NAME_MAX;
4627 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4628 	error = VFS_STATFS(mp, sp);
4629 	if (error == 0)
4630 		*buf = *sp;
4631 out:
4632 	vfs_unbusy(mp);
4633 	VFS_UNLOCK_GIANT(vfslocked);
4634 	return (error);
4635 }
4636