xref: /freebsd/sys/kern/vfs_syscalls.c (revision 4ed925457ab06e83238a5db33e89ccc94b99a713)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_kdtrace.h"
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bio.h>
47 #include <sys/buf.h>
48 #include <sys/sysent.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/filio.h>
59 #include <sys/limits.h>
60 #include <sys/linker.h>
61 #include <sys/sdt.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/priv.h>
67 #include <sys/proc.h>
68 #include <sys/dirent.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 
76 #include <machine/stdarg.h>
77 
78 #include <security/audit/audit.h>
79 #include <security/mac/mac_framework.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/uma.h>
85 
86 SDT_PROVIDER_DEFINE(vfs);
87 SDT_PROBE_DEFINE(vfs, , stat, mode);
88 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
89 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
90 SDT_PROBE_DEFINE(vfs, , stat, reg);
91 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
92 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
93 
94 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
96 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
97 static int setfmode(struct thread *td, struct vnode *, int);
98 static int setfflags(struct thread *td, struct vnode *, int);
99 static int setutimes(struct thread *td, struct vnode *,
100     const struct timespec *, int, int);
101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
102     struct thread *td);
103 
104 /*
105  * The module initialization routine for POSIX asynchronous I/O will
106  * set this to the version of AIO that it implements.  (Zero means
107  * that it is not implemented.)  This value is used here by pathconf()
108  * and in kern_descrip.c by fpathconf().
109  */
110 int async_io_version;
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /*
118  * Sync each mounted filesystem.
119  */
120 #ifndef _SYS_SYSPROTO_H_
121 struct sync_args {
122 	int     dummy;
123 };
124 #endif
125 /* ARGSUSED */
126 int
127 sync(td, uap)
128 	struct thread *td;
129 	struct sync_args *uap;
130 {
131 	struct mount *mp, *nmp;
132 	int vfslocked;
133 
134 	mtx_lock(&mountlist_mtx);
135 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
136 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
137 			nmp = TAILQ_NEXT(mp, mnt_list);
138 			continue;
139 		}
140 		vfslocked = VFS_LOCK_GIANT(mp);
141 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
142 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
143 			MNT_ILOCK(mp);
144 			mp->mnt_noasync++;
145 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
146 			MNT_IUNLOCK(mp);
147 			vfs_msync(mp, MNT_NOWAIT);
148 			VFS_SYNC(mp, MNT_NOWAIT);
149 			MNT_ILOCK(mp);
150 			mp->mnt_noasync--;
151 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
152 			    mp->mnt_noasync == 0)
153 				mp->mnt_kern_flag |= MNTK_ASYNC;
154 			MNT_IUNLOCK(mp);
155 			vn_finished_write(mp);
156 		}
157 		VFS_UNLOCK_GIANT(vfslocked);
158 		mtx_lock(&mountlist_mtx);
159 		nmp = TAILQ_NEXT(mp, mnt_list);
160 		vfs_unbusy(mp);
161 	}
162 	mtx_unlock(&mountlist_mtx);
163 	return (0);
164 }
165 
166 /*
167  * Change filesystem quotas.
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp;
188 	int vfslocked;
189 	int error;
190 	struct nameidata nd;
191 
192 	AUDIT_ARG_CMD(uap->cmd);
193 	AUDIT_ARG_UID(uap->uid);
194 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
195 		return (EPERM);
196 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
197 	   UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0)
199 		return (error);
200 	vfslocked = NDHASGIANT(&nd);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	mp = nd.ni_vp->v_mount;
203 	vfs_ref(mp);
204 	vput(nd.ni_vp);
205 	error = vfs_busy(mp, 0);
206 	vfs_rel(mp);
207 	if (error) {
208 		VFS_UNLOCK_GIANT(vfslocked);
209 		return (error);
210 	}
211 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
212 	vfs_unbusy(mp);
213 	VFS_UNLOCK_GIANT(vfslocked);
214 	return (error);
215 }
216 
217 /*
218  * Used by statfs conversion routines to scale the block size up if
219  * necessary so that all of the block counts are <= 'max_size'.  Note
220  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
221  * value of 'n'.
222  */
223 void
224 statfs_scale_blocks(struct statfs *sf, long max_size)
225 {
226 	uint64_t count;
227 	int shift;
228 
229 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
230 
231 	/*
232 	 * Attempt to scale the block counts to give a more accurate
233 	 * overview to userland of the ratio of free space to used
234 	 * space.  To do this, find the largest block count and compute
235 	 * a divisor that lets it fit into a signed integer <= max_size.
236 	 */
237 	if (sf->f_bavail < 0)
238 		count = -sf->f_bavail;
239 	else
240 		count = sf->f_bavail;
241 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
242 	if (count <= max_size)
243 		return;
244 
245 	count >>= flsl(max_size);
246 	shift = 0;
247 	while (count > 0) {
248 		shift++;
249 		count >>=1;
250 	}
251 
252 	sf->f_bsize <<= shift;
253 	sf->f_blocks >>= shift;
254 	sf->f_bfree >>= shift;
255 	sf->f_bavail >>= shift;
256 }
257 
258 /*
259  * Get filesystem statistics.
260  */
261 #ifndef _SYS_SYSPROTO_H_
262 struct statfs_args {
263 	char *path;
264 	struct statfs *buf;
265 };
266 #endif
267 int
268 statfs(td, uap)
269 	struct thread *td;
270 	register struct statfs_args /* {
271 		char *path;
272 		struct statfs *buf;
273 	} */ *uap;
274 {
275 	struct statfs sf;
276 	int error;
277 
278 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
279 	if (error == 0)
280 		error = copyout(&sf, uap->buf, sizeof(sf));
281 	return (error);
282 }
283 
284 int
285 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
286     struct statfs *buf)
287 {
288 	struct mount *mp;
289 	struct statfs *sp, sb;
290 	int vfslocked;
291 	int error;
292 	struct nameidata nd;
293 
294 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
295 	    AUDITVNODE1, pathseg, path, td);
296 	error = namei(&nd);
297 	if (error)
298 		return (error);
299 	vfslocked = NDHASGIANT(&nd);
300 	mp = nd.ni_vp->v_mount;
301 	vfs_ref(mp);
302 	NDFREE(&nd, NDF_ONLY_PNBUF);
303 	vput(nd.ni_vp);
304 	error = vfs_busy(mp, 0);
305 	vfs_rel(mp);
306 	if (error) {
307 		VFS_UNLOCK_GIANT(vfslocked);
308 		return (error);
309 	}
310 #ifdef MAC
311 	error = mac_mount_check_stat(td->td_ucred, mp);
312 	if (error)
313 		goto out;
314 #endif
315 	/*
316 	 * Set these in case the underlying filesystem fails to do so.
317 	 */
318 	sp = &mp->mnt_stat;
319 	sp->f_version = STATFS_VERSION;
320 	sp->f_namemax = NAME_MAX;
321 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
322 	error = VFS_STATFS(mp, sp);
323 	if (error)
324 		goto out;
325 	if (priv_check(td, PRIV_VFS_GENERATION)) {
326 		bcopy(sp, &sb, sizeof(sb));
327 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
328 		prison_enforce_statfs(td->td_ucred, mp, &sb);
329 		sp = &sb;
330 	}
331 	*buf = *sp;
332 out:
333 	vfs_unbusy(mp);
334 	VFS_UNLOCK_GIANT(vfslocked);
335 	return (error);
336 }
337 
338 /*
339  * Get filesystem statistics.
340  */
341 #ifndef _SYS_SYSPROTO_H_
342 struct fstatfs_args {
343 	int fd;
344 	struct statfs *buf;
345 };
346 #endif
347 int
348 fstatfs(td, uap)
349 	struct thread *td;
350 	register struct fstatfs_args /* {
351 		int fd;
352 		struct statfs *buf;
353 	} */ *uap;
354 {
355 	struct statfs sf;
356 	int error;
357 
358 	error = kern_fstatfs(td, uap->fd, &sf);
359 	if (error == 0)
360 		error = copyout(&sf, uap->buf, sizeof(sf));
361 	return (error);
362 }
363 
364 int
365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
366 {
367 	struct file *fp;
368 	struct mount *mp;
369 	struct statfs *sp, sb;
370 	int vfslocked;
371 	struct vnode *vp;
372 	int error;
373 
374 	AUDIT_ARG_FD(fd);
375 	error = getvnode(td->td_proc->p_fd, fd, &fp);
376 	if (error)
377 		return (error);
378 	vp = fp->f_vnode;
379 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
380 	vn_lock(vp, LK_SHARED | LK_RETRY);
381 #ifdef AUDIT
382 	AUDIT_ARG_VNODE1(vp);
383 #endif
384 	mp = vp->v_mount;
385 	if (mp)
386 		vfs_ref(mp);
387 	VOP_UNLOCK(vp, 0);
388 	fdrop(fp, td);
389 	if (mp == NULL) {
390 		error = EBADF;
391 		goto out;
392 	}
393 	error = vfs_busy(mp, 0);
394 	vfs_rel(mp);
395 	if (error) {
396 		VFS_UNLOCK_GIANT(vfslocked);
397 		return (error);
398 	}
399 #ifdef MAC
400 	error = mac_mount_check_stat(td->td_ucred, mp);
401 	if (error)
402 		goto out;
403 #endif
404 	/*
405 	 * Set these in case the underlying filesystem fails to do so.
406 	 */
407 	sp = &mp->mnt_stat;
408 	sp->f_version = STATFS_VERSION;
409 	sp->f_namemax = NAME_MAX;
410 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
411 	error = VFS_STATFS(mp, sp);
412 	if (error)
413 		goto out;
414 	if (priv_check(td, PRIV_VFS_GENERATION)) {
415 		bcopy(sp, &sb, sizeof(sb));
416 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
417 		prison_enforce_statfs(td->td_ucred, mp, &sb);
418 		sp = &sb;
419 	}
420 	*buf = *sp;
421 out:
422 	if (mp)
423 		vfs_unbusy(mp);
424 	VFS_UNLOCK_GIANT(vfslocked);
425 	return (error);
426 }
427 
428 /*
429  * Get statistics on all filesystems.
430  */
431 #ifndef _SYS_SYSPROTO_H_
432 struct getfsstat_args {
433 	struct statfs *buf;
434 	long bufsize;
435 	int flags;
436 };
437 #endif
438 int
439 getfsstat(td, uap)
440 	struct thread *td;
441 	register struct getfsstat_args /* {
442 		struct statfs *buf;
443 		long bufsize;
444 		int flags;
445 	} */ *uap;
446 {
447 
448 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
449 	    uap->flags));
450 }
451 
452 /*
453  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
454  * 	The caller is responsible for freeing memory which will be allocated
455  *	in '*buf'.
456  */
457 int
458 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
459     enum uio_seg bufseg, int flags)
460 {
461 	struct mount *mp, *nmp;
462 	struct statfs *sfsp, *sp, sb;
463 	size_t count, maxcount;
464 	int vfslocked;
465 	int error;
466 
467 	maxcount = bufsize / sizeof(struct statfs);
468 	if (bufsize == 0)
469 		sfsp = NULL;
470 	else if (bufseg == UIO_USERSPACE)
471 		sfsp = *buf;
472 	else /* if (bufseg == UIO_SYSSPACE) */ {
473 		count = 0;
474 		mtx_lock(&mountlist_mtx);
475 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
476 			count++;
477 		}
478 		mtx_unlock(&mountlist_mtx);
479 		if (maxcount > count)
480 			maxcount = count;
481 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
482 		    M_WAITOK);
483 	}
484 	count = 0;
485 	mtx_lock(&mountlist_mtx);
486 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
487 		if (prison_canseemount(td->td_ucred, mp) != 0) {
488 			nmp = TAILQ_NEXT(mp, mnt_list);
489 			continue;
490 		}
491 #ifdef MAC
492 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
493 			nmp = TAILQ_NEXT(mp, mnt_list);
494 			continue;
495 		}
496 #endif
497 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
498 			nmp = TAILQ_NEXT(mp, mnt_list);
499 			continue;
500 		}
501 		vfslocked = VFS_LOCK_GIANT(mp);
502 		if (sfsp && count < maxcount) {
503 			sp = &mp->mnt_stat;
504 			/*
505 			 * Set these in case the underlying filesystem
506 			 * fails to do so.
507 			 */
508 			sp->f_version = STATFS_VERSION;
509 			sp->f_namemax = NAME_MAX;
510 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
511 			/*
512 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
513 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
514 			 * overrides MNT_WAIT.
515 			 */
516 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
517 			    (flags & MNT_WAIT)) &&
518 			    (error = VFS_STATFS(mp, sp))) {
519 				VFS_UNLOCK_GIANT(vfslocked);
520 				mtx_lock(&mountlist_mtx);
521 				nmp = TAILQ_NEXT(mp, mnt_list);
522 				vfs_unbusy(mp);
523 				continue;
524 			}
525 			if (priv_check(td, PRIV_VFS_GENERATION)) {
526 				bcopy(sp, &sb, sizeof(sb));
527 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
528 				prison_enforce_statfs(td->td_ucred, mp, &sb);
529 				sp = &sb;
530 			}
531 			if (bufseg == UIO_SYSSPACE)
532 				bcopy(sp, sfsp, sizeof(*sp));
533 			else /* if (bufseg == UIO_USERSPACE) */ {
534 				error = copyout(sp, sfsp, sizeof(*sp));
535 				if (error) {
536 					vfs_unbusy(mp);
537 					VFS_UNLOCK_GIANT(vfslocked);
538 					return (error);
539 				}
540 			}
541 			sfsp++;
542 		}
543 		VFS_UNLOCK_GIANT(vfslocked);
544 		count++;
545 		mtx_lock(&mountlist_mtx);
546 		nmp = TAILQ_NEXT(mp, mnt_list);
547 		vfs_unbusy(mp);
548 	}
549 	mtx_unlock(&mountlist_mtx);
550 	if (sfsp && count > maxcount)
551 		td->td_retval[0] = maxcount;
552 	else
553 		td->td_retval[0] = count;
554 	return (0);
555 }
556 
557 #ifdef COMPAT_FREEBSD4
558 /*
559  * Get old format filesystem statistics.
560  */
561 static void cvtstatfs(struct statfs *, struct ostatfs *);
562 
563 #ifndef _SYS_SYSPROTO_H_
564 struct freebsd4_statfs_args {
565 	char *path;
566 	struct ostatfs *buf;
567 };
568 #endif
569 int
570 freebsd4_statfs(td, uap)
571 	struct thread *td;
572 	struct freebsd4_statfs_args /* {
573 		char *path;
574 		struct ostatfs *buf;
575 	} */ *uap;
576 {
577 	struct ostatfs osb;
578 	struct statfs sf;
579 	int error;
580 
581 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
582 	if (error)
583 		return (error);
584 	cvtstatfs(&sf, &osb);
585 	return (copyout(&osb, uap->buf, sizeof(osb)));
586 }
587 
588 /*
589  * Get filesystem statistics.
590  */
591 #ifndef _SYS_SYSPROTO_H_
592 struct freebsd4_fstatfs_args {
593 	int fd;
594 	struct ostatfs *buf;
595 };
596 #endif
597 int
598 freebsd4_fstatfs(td, uap)
599 	struct thread *td;
600 	struct freebsd4_fstatfs_args /* {
601 		int fd;
602 		struct ostatfs *buf;
603 	} */ *uap;
604 {
605 	struct ostatfs osb;
606 	struct statfs sf;
607 	int error;
608 
609 	error = kern_fstatfs(td, uap->fd, &sf);
610 	if (error)
611 		return (error);
612 	cvtstatfs(&sf, &osb);
613 	return (copyout(&osb, uap->buf, sizeof(osb)));
614 }
615 
616 /*
617  * Get statistics on all filesystems.
618  */
619 #ifndef _SYS_SYSPROTO_H_
620 struct freebsd4_getfsstat_args {
621 	struct ostatfs *buf;
622 	long bufsize;
623 	int flags;
624 };
625 #endif
626 int
627 freebsd4_getfsstat(td, uap)
628 	struct thread *td;
629 	register struct freebsd4_getfsstat_args /* {
630 		struct ostatfs *buf;
631 		long bufsize;
632 		int flags;
633 	} */ *uap;
634 {
635 	struct statfs *buf, *sp;
636 	struct ostatfs osb;
637 	size_t count, size;
638 	int error;
639 
640 	count = uap->bufsize / sizeof(struct ostatfs);
641 	size = count * sizeof(struct statfs);
642 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
643 	if (size > 0) {
644 		count = td->td_retval[0];
645 		sp = buf;
646 		while (count > 0 && error == 0) {
647 			cvtstatfs(sp, &osb);
648 			error = copyout(&osb, uap->buf, sizeof(osb));
649 			sp++;
650 			uap->buf++;
651 			count--;
652 		}
653 		free(buf, M_TEMP);
654 	}
655 	return (error);
656 }
657 
658 /*
659  * Implement fstatfs() for (NFS) file handles.
660  */
661 #ifndef _SYS_SYSPROTO_H_
662 struct freebsd4_fhstatfs_args {
663 	struct fhandle *u_fhp;
664 	struct ostatfs *buf;
665 };
666 #endif
667 int
668 freebsd4_fhstatfs(td, uap)
669 	struct thread *td;
670 	struct freebsd4_fhstatfs_args /* {
671 		struct fhandle *u_fhp;
672 		struct ostatfs *buf;
673 	} */ *uap;
674 {
675 	struct ostatfs osb;
676 	struct statfs sf;
677 	fhandle_t fh;
678 	int error;
679 
680 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
681 	if (error)
682 		return (error);
683 	error = kern_fhstatfs(td, fh, &sf);
684 	if (error)
685 		return (error);
686 	cvtstatfs(&sf, &osb);
687 	return (copyout(&osb, uap->buf, sizeof(osb)));
688 }
689 
690 /*
691  * Convert a new format statfs structure to an old format statfs structure.
692  */
693 static void
694 cvtstatfs(nsp, osp)
695 	struct statfs *nsp;
696 	struct ostatfs *osp;
697 {
698 
699 	statfs_scale_blocks(nsp, LONG_MAX);
700 	bzero(osp, sizeof(*osp));
701 	osp->f_bsize = nsp->f_bsize;
702 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
703 	osp->f_blocks = nsp->f_blocks;
704 	osp->f_bfree = nsp->f_bfree;
705 	osp->f_bavail = nsp->f_bavail;
706 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
707 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
708 	osp->f_owner = nsp->f_owner;
709 	osp->f_type = nsp->f_type;
710 	osp->f_flags = nsp->f_flags;
711 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
712 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
713 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
714 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
715 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
716 	    MIN(MFSNAMELEN, OMFSNAMELEN));
717 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
718 	    MIN(MNAMELEN, OMNAMELEN));
719 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
720 	    MIN(MNAMELEN, OMNAMELEN));
721 	osp->f_fsid = nsp->f_fsid;
722 }
723 #endif /* COMPAT_FREEBSD4 */
724 
725 /*
726  * Change current working directory to a given file descriptor.
727  */
728 #ifndef _SYS_SYSPROTO_H_
729 struct fchdir_args {
730 	int	fd;
731 };
732 #endif
733 int
734 fchdir(td, uap)
735 	struct thread *td;
736 	struct fchdir_args /* {
737 		int fd;
738 	} */ *uap;
739 {
740 	register struct filedesc *fdp = td->td_proc->p_fd;
741 	struct vnode *vp, *tdp, *vpold;
742 	struct mount *mp;
743 	struct file *fp;
744 	int vfslocked;
745 	int error;
746 
747 	AUDIT_ARG_FD(uap->fd);
748 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
749 		return (error);
750 	vp = fp->f_vnode;
751 	VREF(vp);
752 	fdrop(fp, td);
753 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
754 	vn_lock(vp, LK_SHARED | LK_RETRY);
755 	AUDIT_ARG_VNODE1(vp);
756 	error = change_dir(vp, td);
757 	while (!error && (mp = vp->v_mountedhere) != NULL) {
758 		int tvfslocked;
759 		if (vfs_busy(mp, 0))
760 			continue;
761 		tvfslocked = VFS_LOCK_GIANT(mp);
762 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
763 		vfs_unbusy(mp);
764 		if (error) {
765 			VFS_UNLOCK_GIANT(tvfslocked);
766 			break;
767 		}
768 		vput(vp);
769 		VFS_UNLOCK_GIANT(vfslocked);
770 		vp = tdp;
771 		vfslocked = tvfslocked;
772 	}
773 	if (error) {
774 		vput(vp);
775 		VFS_UNLOCK_GIANT(vfslocked);
776 		return (error);
777 	}
778 	VOP_UNLOCK(vp, 0);
779 	VFS_UNLOCK_GIANT(vfslocked);
780 	FILEDESC_XLOCK(fdp);
781 	vpold = fdp->fd_cdir;
782 	fdp->fd_cdir = vp;
783 	FILEDESC_XUNLOCK(fdp);
784 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
785 	vrele(vpold);
786 	VFS_UNLOCK_GIANT(vfslocked);
787 	return (0);
788 }
789 
790 /*
791  * Change current working directory (``.'').
792  */
793 #ifndef _SYS_SYSPROTO_H_
794 struct chdir_args {
795 	char	*path;
796 };
797 #endif
798 int
799 chdir(td, uap)
800 	struct thread *td;
801 	struct chdir_args /* {
802 		char *path;
803 	} */ *uap;
804 {
805 
806 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
807 }
808 
809 int
810 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
811 {
812 	register struct filedesc *fdp = td->td_proc->p_fd;
813 	int error;
814 	struct nameidata nd;
815 	struct vnode *vp;
816 	int vfslocked;
817 
818 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
819 	    MPSAFE, pathseg, path, td);
820 	if ((error = namei(&nd)) != 0)
821 		return (error);
822 	vfslocked = NDHASGIANT(&nd);
823 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
824 		vput(nd.ni_vp);
825 		VFS_UNLOCK_GIANT(vfslocked);
826 		NDFREE(&nd, NDF_ONLY_PNBUF);
827 		return (error);
828 	}
829 	VOP_UNLOCK(nd.ni_vp, 0);
830 	VFS_UNLOCK_GIANT(vfslocked);
831 	NDFREE(&nd, NDF_ONLY_PNBUF);
832 	FILEDESC_XLOCK(fdp);
833 	vp = fdp->fd_cdir;
834 	fdp->fd_cdir = nd.ni_vp;
835 	FILEDESC_XUNLOCK(fdp);
836 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
837 	vrele(vp);
838 	VFS_UNLOCK_GIANT(vfslocked);
839 	return (0);
840 }
841 
842 /*
843  * Helper function for raised chroot(2) security function:  Refuse if
844  * any filedescriptors are open directories.
845  */
846 static int
847 chroot_refuse_vdir_fds(fdp)
848 	struct filedesc *fdp;
849 {
850 	struct vnode *vp;
851 	struct file *fp;
852 	int fd;
853 
854 	FILEDESC_LOCK_ASSERT(fdp);
855 
856 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
857 		fp = fget_locked(fdp, fd);
858 		if (fp == NULL)
859 			continue;
860 		if (fp->f_type == DTYPE_VNODE) {
861 			vp = fp->f_vnode;
862 			if (vp->v_type == VDIR)
863 				return (EPERM);
864 		}
865 	}
866 	return (0);
867 }
868 
869 /*
870  * This sysctl determines if we will allow a process to chroot(2) if it
871  * has a directory open:
872  *	0: disallowed for all processes.
873  *	1: allowed for processes that were not already chroot(2)'ed.
874  *	2: allowed for all processes.
875  */
876 
877 static int chroot_allow_open_directories = 1;
878 
879 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
880      &chroot_allow_open_directories, 0, "");
881 
882 /*
883  * Change notion of root (``/'') directory.
884  */
885 #ifndef _SYS_SYSPROTO_H_
886 struct chroot_args {
887 	char	*path;
888 };
889 #endif
890 int
891 chroot(td, uap)
892 	struct thread *td;
893 	struct chroot_args /* {
894 		char *path;
895 	} */ *uap;
896 {
897 	int error;
898 	struct nameidata nd;
899 	int vfslocked;
900 
901 	error = priv_check(td, PRIV_VFS_CHROOT);
902 	if (error)
903 		return (error);
904 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
905 	    AUDITVNODE1, UIO_USERSPACE, uap->path, td);
906 	error = namei(&nd);
907 	if (error)
908 		goto error;
909 	vfslocked = NDHASGIANT(&nd);
910 	if ((error = change_dir(nd.ni_vp, td)) != 0)
911 		goto e_vunlock;
912 #ifdef MAC
913 	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
914 		goto e_vunlock;
915 #endif
916 	VOP_UNLOCK(nd.ni_vp, 0);
917 	error = change_root(nd.ni_vp, td);
918 	vrele(nd.ni_vp);
919 	VFS_UNLOCK_GIANT(vfslocked);
920 	NDFREE(&nd, NDF_ONLY_PNBUF);
921 	return (error);
922 e_vunlock:
923 	vput(nd.ni_vp);
924 	VFS_UNLOCK_GIANT(vfslocked);
925 error:
926 	NDFREE(&nd, NDF_ONLY_PNBUF);
927 	return (error);
928 }
929 
930 /*
931  * Common routine for chroot and chdir.  Callers must provide a locked vnode
932  * instance.
933  */
934 int
935 change_dir(vp, td)
936 	struct vnode *vp;
937 	struct thread *td;
938 {
939 	int error;
940 
941 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
942 	if (vp->v_type != VDIR)
943 		return (ENOTDIR);
944 #ifdef MAC
945 	error = mac_vnode_check_chdir(td->td_ucred, vp);
946 	if (error)
947 		return (error);
948 #endif
949 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
950 	return (error);
951 }
952 
953 /*
954  * Common routine for kern_chroot() and jail_attach().  The caller is
955  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
956  * authorize this operation.
957  */
958 int
959 change_root(vp, td)
960 	struct vnode *vp;
961 	struct thread *td;
962 {
963 	struct filedesc *fdp;
964 	struct vnode *oldvp;
965 	int vfslocked;
966 	int error;
967 
968 	VFS_ASSERT_GIANT(vp->v_mount);
969 	fdp = td->td_proc->p_fd;
970 	FILEDESC_XLOCK(fdp);
971 	if (chroot_allow_open_directories == 0 ||
972 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
973 		error = chroot_refuse_vdir_fds(fdp);
974 		if (error) {
975 			FILEDESC_XUNLOCK(fdp);
976 			return (error);
977 		}
978 	}
979 	oldvp = fdp->fd_rdir;
980 	fdp->fd_rdir = vp;
981 	VREF(fdp->fd_rdir);
982 	if (!fdp->fd_jdir) {
983 		fdp->fd_jdir = vp;
984 		VREF(fdp->fd_jdir);
985 	}
986 	FILEDESC_XUNLOCK(fdp);
987 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
988 	vrele(oldvp);
989 	VFS_UNLOCK_GIANT(vfslocked);
990 	return (0);
991 }
992 
993 /*
994  * Check permissions, allocate an open file structure, and call the device
995  * open routine if any.
996  */
997 #ifndef _SYS_SYSPROTO_H_
998 struct open_args {
999 	char	*path;
1000 	int	flags;
1001 	int	mode;
1002 };
1003 #endif
1004 int
1005 open(td, uap)
1006 	struct thread *td;
1007 	register struct open_args /* {
1008 		char *path;
1009 		int flags;
1010 		int mode;
1011 	} */ *uap;
1012 {
1013 
1014 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
1015 }
1016 
1017 #ifndef _SYS_SYSPROTO_H_
1018 struct openat_args {
1019 	int	fd;
1020 	char	*path;
1021 	int	flag;
1022 	int	mode;
1023 };
1024 #endif
1025 int
1026 openat(struct thread *td, struct openat_args *uap)
1027 {
1028 
1029 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
1030 	    uap->mode));
1031 }
1032 
1033 int
1034 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
1035     int mode)
1036 {
1037 
1038 	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
1039 }
1040 
1041 int
1042 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1043     int flags, int mode)
1044 {
1045 	struct proc *p = td->td_proc;
1046 	struct filedesc *fdp = p->p_fd;
1047 	struct file *fp;
1048 	struct vnode *vp;
1049 	struct vattr vat;
1050 	struct mount *mp;
1051 	int cmode;
1052 	struct file *nfp;
1053 	int type, indx, error;
1054 	struct flock lf;
1055 	struct nameidata nd;
1056 	int vfslocked;
1057 
1058 	AUDIT_ARG_FFLAGS(flags);
1059 	AUDIT_ARG_MODE(mode);
1060 	/* XXX: audit dirfd */
1061 	/*
1062 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR may
1063 	 * be specified.
1064 	 */
1065 	if (flags & O_EXEC) {
1066 		if (flags & O_ACCMODE)
1067 			return (EINVAL);
1068 	} else if ((flags & O_ACCMODE) == O_ACCMODE)
1069 		return (EINVAL);
1070 	else
1071 		flags = FFLAGS(flags);
1072 
1073 	error = falloc(td, &nfp, &indx);
1074 	if (error)
1075 		return (error);
1076 	/* An extra reference on `nfp' has been held for us by falloc(). */
1077 	fp = nfp;
1078 	/* Set the flags early so the finit in devfs can pick them up. */
1079 	fp->f_flag = flags & FMASK;
1080 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1081 	NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
1082 	    td);
1083 	td->td_dupfd = -1;		/* XXX check for fdopen */
1084 	error = vn_open(&nd, &flags, cmode, fp);
1085 	if (error) {
1086 		/*
1087 		 * If the vn_open replaced the method vector, something
1088 		 * wonderous happened deep below and we just pass it up
1089 		 * pretending we know what we do.
1090 		 */
1091 		if (error == ENXIO && fp->f_ops != &badfileops) {
1092 			fdrop(fp, td);
1093 			td->td_retval[0] = indx;
1094 			return (0);
1095 		}
1096 
1097 		/*
1098 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1099 		 * responsible for dropping the old contents of ofiles[indx]
1100 		 * if it succeeds.
1101 		 */
1102 		if ((error == ENODEV || error == ENXIO) &&
1103 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1104 		    (error =
1105 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1106 			td->td_retval[0] = indx;
1107 			fdrop(fp, td);
1108 			return (0);
1109 		}
1110 		/*
1111 		 * Clean up the descriptor, but only if another thread hadn't
1112 		 * replaced or closed it.
1113 		 */
1114 		fdclose(fdp, fp, indx, td);
1115 		fdrop(fp, td);
1116 
1117 		if (error == ERESTART)
1118 			error = EINTR;
1119 		return (error);
1120 	}
1121 	td->td_dupfd = 0;
1122 	vfslocked = NDHASGIANT(&nd);
1123 	NDFREE(&nd, NDF_ONLY_PNBUF);
1124 	vp = nd.ni_vp;
1125 
1126 	fp->f_vnode = vp;	/* XXX Does devfs need this? */
1127 	/*
1128 	 * If the file wasn't claimed by devfs bind it to the normal
1129 	 * vnode operations here.
1130 	 */
1131 	if (fp->f_ops == &badfileops) {
1132 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1133 		fp->f_seqcount = 1;
1134 		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1135 	}
1136 
1137 	VOP_UNLOCK(vp, 0);
1138 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1139 		lf.l_whence = SEEK_SET;
1140 		lf.l_start = 0;
1141 		lf.l_len = 0;
1142 		if (flags & O_EXLOCK)
1143 			lf.l_type = F_WRLCK;
1144 		else
1145 			lf.l_type = F_RDLCK;
1146 		type = F_FLOCK;
1147 		if ((flags & FNONBLOCK) == 0)
1148 			type |= F_WAIT;
1149 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1150 			    type)) != 0)
1151 			goto bad;
1152 		atomic_set_int(&fp->f_flag, FHASLOCK);
1153 	}
1154 	if (flags & O_TRUNC) {
1155 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1156 			goto bad;
1157 		VATTR_NULL(&vat);
1158 		vat.va_size = 0;
1159 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1160 #ifdef MAC
1161 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
1162 		if (error == 0)
1163 #endif
1164 			error = VOP_SETATTR(vp, &vat, td->td_ucred);
1165 		VOP_UNLOCK(vp, 0);
1166 		vn_finished_write(mp);
1167 		if (error)
1168 			goto bad;
1169 	}
1170 	VFS_UNLOCK_GIANT(vfslocked);
1171 	/*
1172 	 * Release our private reference, leaving the one associated with
1173 	 * the descriptor table intact.
1174 	 */
1175 	fdrop(fp, td);
1176 	td->td_retval[0] = indx;
1177 	return (0);
1178 bad:
1179 	VFS_UNLOCK_GIANT(vfslocked);
1180 	fdclose(fdp, fp, indx, td);
1181 	fdrop(fp, td);
1182 	return (error);
1183 }
1184 
1185 #ifdef COMPAT_43
1186 /*
1187  * Create a file.
1188  */
1189 #ifndef _SYS_SYSPROTO_H_
1190 struct ocreat_args {
1191 	char	*path;
1192 	int	mode;
1193 };
1194 #endif
1195 int
1196 ocreat(td, uap)
1197 	struct thread *td;
1198 	register struct ocreat_args /* {
1199 		char *path;
1200 		int mode;
1201 	} */ *uap;
1202 {
1203 
1204 	return (kern_open(td, uap->path, UIO_USERSPACE,
1205 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1206 }
1207 #endif /* COMPAT_43 */
1208 
1209 /*
1210  * Create a special file.
1211  */
1212 #ifndef _SYS_SYSPROTO_H_
1213 struct mknod_args {
1214 	char	*path;
1215 	int	mode;
1216 	int	dev;
1217 };
1218 #endif
1219 int
1220 mknod(td, uap)
1221 	struct thread *td;
1222 	register struct mknod_args /* {
1223 		char *path;
1224 		int mode;
1225 		int dev;
1226 	} */ *uap;
1227 {
1228 
1229 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1230 }
1231 
1232 #ifndef _SYS_SYSPROTO_H_
1233 struct mknodat_args {
1234 	int	fd;
1235 	char	*path;
1236 	mode_t	mode;
1237 	dev_t	dev;
1238 };
1239 #endif
1240 int
1241 mknodat(struct thread *td, struct mknodat_args *uap)
1242 {
1243 
1244 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1245 	    uap->dev));
1246 }
1247 
1248 int
1249 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1250     int dev)
1251 {
1252 
1253 	return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
1254 }
1255 
1256 int
1257 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1258     int mode, int dev)
1259 {
1260 	struct vnode *vp;
1261 	struct mount *mp;
1262 	struct vattr vattr;
1263 	int error;
1264 	int whiteout = 0;
1265 	struct nameidata nd;
1266 	int vfslocked;
1267 
1268 	AUDIT_ARG_MODE(mode);
1269 	AUDIT_ARG_DEV(dev);
1270 	switch (mode & S_IFMT) {
1271 	case S_IFCHR:
1272 	case S_IFBLK:
1273 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1274 		break;
1275 	case S_IFMT:
1276 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1277 		break;
1278 	case S_IFWHT:
1279 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1280 		break;
1281 	case S_IFIFO:
1282 		if (dev == 0)
1283 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1284 		/* FALLTHROUGH */
1285 	default:
1286 		error = EINVAL;
1287 		break;
1288 	}
1289 	if (error)
1290 		return (error);
1291 restart:
1292 	bwillwrite();
1293 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1294 	    pathseg, path, fd, td);
1295 	if ((error = namei(&nd)) != 0)
1296 		return (error);
1297 	vfslocked = NDHASGIANT(&nd);
1298 	vp = nd.ni_vp;
1299 	if (vp != NULL) {
1300 		NDFREE(&nd, NDF_ONLY_PNBUF);
1301 		if (vp == nd.ni_dvp)
1302 			vrele(nd.ni_dvp);
1303 		else
1304 			vput(nd.ni_dvp);
1305 		vrele(vp);
1306 		VFS_UNLOCK_GIANT(vfslocked);
1307 		return (EEXIST);
1308 	} else {
1309 		VATTR_NULL(&vattr);
1310 		vattr.va_mode = (mode & ALLPERMS) &
1311 		    ~td->td_proc->p_fd->fd_cmask;
1312 		vattr.va_rdev = dev;
1313 		whiteout = 0;
1314 
1315 		switch (mode & S_IFMT) {
1316 		case S_IFMT:	/* used by badsect to flag bad sectors */
1317 			vattr.va_type = VBAD;
1318 			break;
1319 		case S_IFCHR:
1320 			vattr.va_type = VCHR;
1321 			break;
1322 		case S_IFBLK:
1323 			vattr.va_type = VBLK;
1324 			break;
1325 		case S_IFWHT:
1326 			whiteout = 1;
1327 			break;
1328 		default:
1329 			panic("kern_mknod: invalid mode");
1330 		}
1331 	}
1332 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1333 		NDFREE(&nd, NDF_ONLY_PNBUF);
1334 		vput(nd.ni_dvp);
1335 		VFS_UNLOCK_GIANT(vfslocked);
1336 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1337 			return (error);
1338 		goto restart;
1339 	}
1340 #ifdef MAC
1341 	if (error == 0 && !whiteout)
1342 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1343 		    &nd.ni_cnd, &vattr);
1344 #endif
1345 	if (!error) {
1346 		if (whiteout)
1347 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1348 		else {
1349 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1350 						&nd.ni_cnd, &vattr);
1351 			if (error == 0)
1352 				vput(nd.ni_vp);
1353 		}
1354 	}
1355 	NDFREE(&nd, NDF_ONLY_PNBUF);
1356 	vput(nd.ni_dvp);
1357 	vn_finished_write(mp);
1358 	VFS_UNLOCK_GIANT(vfslocked);
1359 	return (error);
1360 }
1361 
1362 /*
1363  * Create a named pipe.
1364  */
1365 #ifndef _SYS_SYSPROTO_H_
1366 struct mkfifo_args {
1367 	char	*path;
1368 	int	mode;
1369 };
1370 #endif
1371 int
1372 mkfifo(td, uap)
1373 	struct thread *td;
1374 	register struct mkfifo_args /* {
1375 		char *path;
1376 		int mode;
1377 	} */ *uap;
1378 {
1379 
1380 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1381 }
1382 
1383 #ifndef _SYS_SYSPROTO_H_
1384 struct mkfifoat_args {
1385 	int	fd;
1386 	char	*path;
1387 	mode_t	mode;
1388 };
1389 #endif
1390 int
1391 mkfifoat(struct thread *td, struct mkfifoat_args *uap)
1392 {
1393 
1394 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
1395 	    uap->mode));
1396 }
1397 
1398 int
1399 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1400 {
1401 
1402 	return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
1403 }
1404 
1405 int
1406 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1407     int mode)
1408 {
1409 	struct mount *mp;
1410 	struct vattr vattr;
1411 	int error;
1412 	struct nameidata nd;
1413 	int vfslocked;
1414 
1415 	AUDIT_ARG_MODE(mode);
1416 restart:
1417 	bwillwrite();
1418 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1419 	    pathseg, path, fd, td);
1420 	if ((error = namei(&nd)) != 0)
1421 		return (error);
1422 	vfslocked = NDHASGIANT(&nd);
1423 	if (nd.ni_vp != NULL) {
1424 		NDFREE(&nd, NDF_ONLY_PNBUF);
1425 		if (nd.ni_vp == nd.ni_dvp)
1426 			vrele(nd.ni_dvp);
1427 		else
1428 			vput(nd.ni_dvp);
1429 		vrele(nd.ni_vp);
1430 		VFS_UNLOCK_GIANT(vfslocked);
1431 		return (EEXIST);
1432 	}
1433 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1434 		NDFREE(&nd, NDF_ONLY_PNBUF);
1435 		vput(nd.ni_dvp);
1436 		VFS_UNLOCK_GIANT(vfslocked);
1437 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1438 			return (error);
1439 		goto restart;
1440 	}
1441 	VATTR_NULL(&vattr);
1442 	vattr.va_type = VFIFO;
1443 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1444 #ifdef MAC
1445 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1446 	    &vattr);
1447 	if (error)
1448 		goto out;
1449 #endif
1450 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1451 	if (error == 0)
1452 		vput(nd.ni_vp);
1453 #ifdef MAC
1454 out:
1455 #endif
1456 	vput(nd.ni_dvp);
1457 	vn_finished_write(mp);
1458 	VFS_UNLOCK_GIANT(vfslocked);
1459 	NDFREE(&nd, NDF_ONLY_PNBUF);
1460 	return (error);
1461 }
1462 
1463 /*
1464  * Make a hard file link.
1465  */
1466 #ifndef _SYS_SYSPROTO_H_
1467 struct link_args {
1468 	char	*path;
1469 	char	*link;
1470 };
1471 #endif
1472 int
1473 link(td, uap)
1474 	struct thread *td;
1475 	register struct link_args /* {
1476 		char *path;
1477 		char *link;
1478 	} */ *uap;
1479 {
1480 
1481 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1482 }
1483 
1484 #ifndef _SYS_SYSPROTO_H_
1485 struct linkat_args {
1486 	int	fd1;
1487 	char	*path1;
1488 	int	fd2;
1489 	char	*path2;
1490 	int	flag;
1491 };
1492 #endif
1493 int
1494 linkat(struct thread *td, struct linkat_args *uap)
1495 {
1496 	int flag;
1497 
1498 	flag = uap->flag;
1499 	if (flag & ~AT_SYMLINK_FOLLOW)
1500 		return (EINVAL);
1501 
1502 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1503 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1504 }
1505 
1506 int hardlink_check_uid = 0;
1507 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1508     &hardlink_check_uid, 0,
1509     "Unprivileged processes cannot create hard links to files owned by other "
1510     "users");
1511 static int hardlink_check_gid = 0;
1512 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1513     &hardlink_check_gid, 0,
1514     "Unprivileged processes cannot create hard links to files owned by other "
1515     "groups");
1516 
1517 static int
1518 can_hardlink(struct vnode *vp, struct ucred *cred)
1519 {
1520 	struct vattr va;
1521 	int error;
1522 
1523 	if (!hardlink_check_uid && !hardlink_check_gid)
1524 		return (0);
1525 
1526 	error = VOP_GETATTR(vp, &va, cred);
1527 	if (error != 0)
1528 		return (error);
1529 
1530 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1531 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1532 		if (error)
1533 			return (error);
1534 	}
1535 
1536 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1537 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1538 		if (error)
1539 			return (error);
1540 	}
1541 
1542 	return (0);
1543 }
1544 
1545 int
1546 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1547 {
1548 
1549 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
1550 }
1551 
1552 int
1553 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1554     enum uio_seg segflg, int follow)
1555 {
1556 	struct vnode *vp;
1557 	struct mount *mp;
1558 	struct nameidata nd;
1559 	int vfslocked;
1560 	int lvfslocked;
1561 	int error;
1562 
1563 	bwillwrite();
1564 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
1565 	    fd1, td);
1566 
1567 	if ((error = namei(&nd)) != 0)
1568 		return (error);
1569 	vfslocked = NDHASGIANT(&nd);
1570 	NDFREE(&nd, NDF_ONLY_PNBUF);
1571 	vp = nd.ni_vp;
1572 	if (vp->v_type == VDIR) {
1573 		vrele(vp);
1574 		VFS_UNLOCK_GIANT(vfslocked);
1575 		return (EPERM);		/* POSIX */
1576 	}
1577 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1578 		vrele(vp);
1579 		VFS_UNLOCK_GIANT(vfslocked);
1580 		return (error);
1581 	}
1582 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1583 	    segflg, path2, fd2, td);
1584 	if ((error = namei(&nd)) == 0) {
1585 		lvfslocked = NDHASGIANT(&nd);
1586 		if (nd.ni_vp != NULL) {
1587 			if (nd.ni_dvp == nd.ni_vp)
1588 				vrele(nd.ni_dvp);
1589 			else
1590 				vput(nd.ni_dvp);
1591 			vrele(nd.ni_vp);
1592 			error = EEXIST;
1593 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1594 		    == 0) {
1595 			error = can_hardlink(vp, td->td_ucred);
1596 			if (error == 0)
1597 #ifdef MAC
1598 				error = mac_vnode_check_link(td->td_ucred,
1599 				    nd.ni_dvp, vp, &nd.ni_cnd);
1600 			if (error == 0)
1601 #endif
1602 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1603 			VOP_UNLOCK(vp, 0);
1604 			vput(nd.ni_dvp);
1605 		}
1606 		NDFREE(&nd, NDF_ONLY_PNBUF);
1607 		VFS_UNLOCK_GIANT(lvfslocked);
1608 	}
1609 	vrele(vp);
1610 	vn_finished_write(mp);
1611 	VFS_UNLOCK_GIANT(vfslocked);
1612 	return (error);
1613 }
1614 
1615 /*
1616  * Make a symbolic link.
1617  */
1618 #ifndef _SYS_SYSPROTO_H_
1619 struct symlink_args {
1620 	char	*path;
1621 	char	*link;
1622 };
1623 #endif
1624 int
1625 symlink(td, uap)
1626 	struct thread *td;
1627 	register struct symlink_args /* {
1628 		char *path;
1629 		char *link;
1630 	} */ *uap;
1631 {
1632 
1633 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1634 }
1635 
1636 #ifndef _SYS_SYSPROTO_H_
1637 struct symlinkat_args {
1638 	char	*path;
1639 	int	fd;
1640 	char	*path2;
1641 };
1642 #endif
1643 int
1644 symlinkat(struct thread *td, struct symlinkat_args *uap)
1645 {
1646 
1647 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1648 	    UIO_USERSPACE));
1649 }
1650 
1651 int
1652 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1653 {
1654 
1655 	return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
1656 }
1657 
1658 int
1659 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1660     enum uio_seg segflg)
1661 {
1662 	struct mount *mp;
1663 	struct vattr vattr;
1664 	char *syspath;
1665 	int error;
1666 	struct nameidata nd;
1667 	int vfslocked;
1668 
1669 	if (segflg == UIO_SYSSPACE) {
1670 		syspath = path1;
1671 	} else {
1672 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1673 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1674 			goto out;
1675 	}
1676 	AUDIT_ARG_TEXT(syspath);
1677 restart:
1678 	bwillwrite();
1679 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1680 	    segflg, path2, fd, td);
1681 	if ((error = namei(&nd)) != 0)
1682 		goto out;
1683 	vfslocked = NDHASGIANT(&nd);
1684 	if (nd.ni_vp) {
1685 		NDFREE(&nd, NDF_ONLY_PNBUF);
1686 		if (nd.ni_vp == nd.ni_dvp)
1687 			vrele(nd.ni_dvp);
1688 		else
1689 			vput(nd.ni_dvp);
1690 		vrele(nd.ni_vp);
1691 		VFS_UNLOCK_GIANT(vfslocked);
1692 		error = EEXIST;
1693 		goto out;
1694 	}
1695 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1696 		NDFREE(&nd, NDF_ONLY_PNBUF);
1697 		vput(nd.ni_dvp);
1698 		VFS_UNLOCK_GIANT(vfslocked);
1699 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1700 			goto out;
1701 		goto restart;
1702 	}
1703 	VATTR_NULL(&vattr);
1704 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1705 #ifdef MAC
1706 	vattr.va_type = VLNK;
1707 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1708 	    &vattr);
1709 	if (error)
1710 		goto out2;
1711 #endif
1712 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1713 	if (error == 0)
1714 		vput(nd.ni_vp);
1715 #ifdef MAC
1716 out2:
1717 #endif
1718 	NDFREE(&nd, NDF_ONLY_PNBUF);
1719 	vput(nd.ni_dvp);
1720 	vn_finished_write(mp);
1721 	VFS_UNLOCK_GIANT(vfslocked);
1722 out:
1723 	if (segflg != UIO_SYSSPACE)
1724 		uma_zfree(namei_zone, syspath);
1725 	return (error);
1726 }
1727 
1728 /*
1729  * Delete a whiteout from the filesystem.
1730  */
1731 int
1732 undelete(td, uap)
1733 	struct thread *td;
1734 	register struct undelete_args /* {
1735 		char *path;
1736 	} */ *uap;
1737 {
1738 	int error;
1739 	struct mount *mp;
1740 	struct nameidata nd;
1741 	int vfslocked;
1742 
1743 restart:
1744 	bwillwrite();
1745 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1746 	    UIO_USERSPACE, uap->path, td);
1747 	error = namei(&nd);
1748 	if (error)
1749 		return (error);
1750 	vfslocked = NDHASGIANT(&nd);
1751 
1752 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1753 		NDFREE(&nd, NDF_ONLY_PNBUF);
1754 		if (nd.ni_vp == nd.ni_dvp)
1755 			vrele(nd.ni_dvp);
1756 		else
1757 			vput(nd.ni_dvp);
1758 		if (nd.ni_vp)
1759 			vrele(nd.ni_vp);
1760 		VFS_UNLOCK_GIANT(vfslocked);
1761 		return (EEXIST);
1762 	}
1763 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1764 		NDFREE(&nd, NDF_ONLY_PNBUF);
1765 		vput(nd.ni_dvp);
1766 		VFS_UNLOCK_GIANT(vfslocked);
1767 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1768 			return (error);
1769 		goto restart;
1770 	}
1771 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1772 	NDFREE(&nd, NDF_ONLY_PNBUF);
1773 	vput(nd.ni_dvp);
1774 	vn_finished_write(mp);
1775 	VFS_UNLOCK_GIANT(vfslocked);
1776 	return (error);
1777 }
1778 
1779 /*
1780  * Delete a name from the filesystem.
1781  */
1782 #ifndef _SYS_SYSPROTO_H_
1783 struct unlink_args {
1784 	char	*path;
1785 };
1786 #endif
1787 int
1788 unlink(td, uap)
1789 	struct thread *td;
1790 	struct unlink_args /* {
1791 		char *path;
1792 	} */ *uap;
1793 {
1794 
1795 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1796 }
1797 
1798 #ifndef _SYS_SYSPROTO_H_
1799 struct unlinkat_args {
1800 	int	fd;
1801 	char	*path;
1802 	int	flag;
1803 };
1804 #endif
1805 int
1806 unlinkat(struct thread *td, struct unlinkat_args *uap)
1807 {
1808 	int flag = uap->flag;
1809 	int fd = uap->fd;
1810 	char *path = uap->path;
1811 
1812 	if (flag & ~AT_REMOVEDIR)
1813 		return (EINVAL);
1814 
1815 	if (flag & AT_REMOVEDIR)
1816 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
1817 	else
1818 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
1819 }
1820 
1821 int
1822 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1823 {
1824 
1825 	return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0));
1826 }
1827 
1828 int
1829 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1830     ino_t oldinum)
1831 {
1832 	struct mount *mp;
1833 	struct vnode *vp;
1834 	int error;
1835 	struct nameidata nd;
1836 	struct stat sb;
1837 	int vfslocked;
1838 
1839 restart:
1840 	bwillwrite();
1841 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1842 	    pathseg, path, fd, td);
1843 	if ((error = namei(&nd)) != 0)
1844 		return (error == EINVAL ? EPERM : error);
1845 	vfslocked = NDHASGIANT(&nd);
1846 	vp = nd.ni_vp;
1847 	if (vp->v_type == VDIR && oldinum == 0) {
1848 		error = EPERM;		/* POSIX */
1849 	} else if (oldinum != 0 &&
1850 		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
1851 		  sb.st_ino != oldinum) {
1852 			error = EIDRM;	/* Identifier removed */
1853 	} else {
1854 		/*
1855 		 * The root of a mounted filesystem cannot be deleted.
1856 		 *
1857 		 * XXX: can this only be a VDIR case?
1858 		 */
1859 		if (vp->v_vflag & VV_ROOT)
1860 			error = EBUSY;
1861 	}
1862 	if (error == 0) {
1863 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1864 			NDFREE(&nd, NDF_ONLY_PNBUF);
1865 			vput(nd.ni_dvp);
1866 			if (vp == nd.ni_dvp)
1867 				vrele(vp);
1868 			else
1869 				vput(vp);
1870 			VFS_UNLOCK_GIANT(vfslocked);
1871 			if ((error = vn_start_write(NULL, &mp,
1872 			    V_XSLEEP | PCATCH)) != 0)
1873 				return (error);
1874 			goto restart;
1875 		}
1876 #ifdef MAC
1877 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1878 		    &nd.ni_cnd);
1879 		if (error)
1880 			goto out;
1881 #endif
1882 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1883 #ifdef MAC
1884 out:
1885 #endif
1886 		vn_finished_write(mp);
1887 	}
1888 	NDFREE(&nd, NDF_ONLY_PNBUF);
1889 	vput(nd.ni_dvp);
1890 	if (vp == nd.ni_dvp)
1891 		vrele(vp);
1892 	else
1893 		vput(vp);
1894 	VFS_UNLOCK_GIANT(vfslocked);
1895 	return (error);
1896 }
1897 
1898 /*
1899  * Reposition read/write file offset.
1900  */
1901 #ifndef _SYS_SYSPROTO_H_
1902 struct lseek_args {
1903 	int	fd;
1904 	int	pad;
1905 	off_t	offset;
1906 	int	whence;
1907 };
1908 #endif
1909 int
1910 lseek(td, uap)
1911 	struct thread *td;
1912 	register struct lseek_args /* {
1913 		int fd;
1914 		int pad;
1915 		off_t offset;
1916 		int whence;
1917 	} */ *uap;
1918 {
1919 	struct ucred *cred = td->td_ucred;
1920 	struct file *fp;
1921 	struct vnode *vp;
1922 	struct vattr vattr;
1923 	off_t offset;
1924 	int error, noneg;
1925 	int vfslocked;
1926 
1927 	AUDIT_ARG_FD(uap->fd);
1928 	if ((error = fget(td, uap->fd, &fp)) != 0)
1929 		return (error);
1930 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1931 		fdrop(fp, td);
1932 		return (ESPIPE);
1933 	}
1934 	vp = fp->f_vnode;
1935 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1936 	noneg = (vp->v_type != VCHR);
1937 	offset = uap->offset;
1938 	switch (uap->whence) {
1939 	case L_INCR:
1940 		if (noneg &&
1941 		    (fp->f_offset < 0 ||
1942 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1943 			error = EOVERFLOW;
1944 			break;
1945 		}
1946 		offset += fp->f_offset;
1947 		break;
1948 	case L_XTND:
1949 		vn_lock(vp, LK_SHARED | LK_RETRY);
1950 		error = VOP_GETATTR(vp, &vattr, cred);
1951 		VOP_UNLOCK(vp, 0);
1952 		if (error)
1953 			break;
1954 		if (noneg &&
1955 		    (vattr.va_size > OFF_MAX ||
1956 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1957 			error = EOVERFLOW;
1958 			break;
1959 		}
1960 		offset += vattr.va_size;
1961 		break;
1962 	case L_SET:
1963 		break;
1964 	case SEEK_DATA:
1965 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1966 		break;
1967 	case SEEK_HOLE:
1968 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1969 		break;
1970 	default:
1971 		error = EINVAL;
1972 	}
1973 	if (error == 0 && noneg && offset < 0)
1974 		error = EINVAL;
1975 	if (error != 0)
1976 		goto drop;
1977 	fp->f_offset = offset;
1978 	*(off_t *)(td->td_retval) = fp->f_offset;
1979 drop:
1980 	fdrop(fp, td);
1981 	VFS_UNLOCK_GIANT(vfslocked);
1982 	return (error);
1983 }
1984 
1985 #if defined(COMPAT_43)
1986 /*
1987  * Reposition read/write file offset.
1988  */
1989 #ifndef _SYS_SYSPROTO_H_
1990 struct olseek_args {
1991 	int	fd;
1992 	long	offset;
1993 	int	whence;
1994 };
1995 #endif
1996 int
1997 olseek(td, uap)
1998 	struct thread *td;
1999 	register struct olseek_args /* {
2000 		int fd;
2001 		long offset;
2002 		int whence;
2003 	} */ *uap;
2004 {
2005 	struct lseek_args /* {
2006 		int fd;
2007 		int pad;
2008 		off_t offset;
2009 		int whence;
2010 	} */ nuap;
2011 
2012 	nuap.fd = uap->fd;
2013 	nuap.offset = uap->offset;
2014 	nuap.whence = uap->whence;
2015 	return (lseek(td, &nuap));
2016 }
2017 #endif /* COMPAT_43 */
2018 
2019 /* Version with the 'pad' argument */
2020 int
2021 freebsd6_lseek(td, uap)
2022 	struct thread *td;
2023 	register struct freebsd6_lseek_args *uap;
2024 {
2025 	struct lseek_args ouap;
2026 
2027 	ouap.fd = uap->fd;
2028 	ouap.offset = uap->offset;
2029 	ouap.whence = uap->whence;
2030 	return (lseek(td, &ouap));
2031 }
2032 
2033 /*
2034  * Check access permissions using passed credentials.
2035  */
2036 static int
2037 vn_access(vp, user_flags, cred, td)
2038 	struct vnode	*vp;
2039 	int		user_flags;
2040 	struct ucred	*cred;
2041 	struct thread	*td;
2042 {
2043 	int error;
2044 	accmode_t accmode;
2045 
2046 	/* Flags == 0 means only check for existence. */
2047 	error = 0;
2048 	if (user_flags) {
2049 		accmode = 0;
2050 		if (user_flags & R_OK)
2051 			accmode |= VREAD;
2052 		if (user_flags & W_OK)
2053 			accmode |= VWRITE;
2054 		if (user_flags & X_OK)
2055 			accmode |= VEXEC;
2056 #ifdef MAC
2057 		error = mac_vnode_check_access(cred, vp, accmode);
2058 		if (error)
2059 			return (error);
2060 #endif
2061 		if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2062 			error = VOP_ACCESS(vp, accmode, cred, td);
2063 	}
2064 	return (error);
2065 }
2066 
2067 /*
2068  * Check access permissions using "real" credentials.
2069  */
2070 #ifndef _SYS_SYSPROTO_H_
2071 struct access_args {
2072 	char	*path;
2073 	int	flags;
2074 };
2075 #endif
2076 int
2077 access(td, uap)
2078 	struct thread *td;
2079 	register struct access_args /* {
2080 		char *path;
2081 		int flags;
2082 	} */ *uap;
2083 {
2084 
2085 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
2086 }
2087 
2088 #ifndef _SYS_SYSPROTO_H_
2089 struct faccessat_args {
2090 	int	dirfd;
2091 	char	*path;
2092 	int	mode;
2093 	int	flag;
2094 }
2095 #endif
2096 int
2097 faccessat(struct thread *td, struct faccessat_args *uap)
2098 {
2099 
2100 	if (uap->flag & ~AT_EACCESS)
2101 		return (EINVAL);
2102 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
2103 	    uap->mode));
2104 }
2105 
2106 int
2107 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2108 {
2109 
2110 	return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
2111 }
2112 
2113 int
2114 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2115     int flags, int mode)
2116 {
2117 	struct ucred *cred, *tmpcred;
2118 	struct vnode *vp;
2119 	struct nameidata nd;
2120 	int vfslocked;
2121 	int error;
2122 
2123 	/*
2124 	 * Create and modify a temporary credential instead of one that
2125 	 * is potentially shared.  This could also mess up socket
2126 	 * buffer accounting which can run in an interrupt context.
2127 	 */
2128 	if (!(flags & AT_EACCESS)) {
2129 		cred = td->td_ucred;
2130 		tmpcred = crdup(cred);
2131 		tmpcred->cr_uid = cred->cr_ruid;
2132 		tmpcred->cr_groups[0] = cred->cr_rgid;
2133 		td->td_ucred = tmpcred;
2134 	} else
2135 		cred = tmpcred = td->td_ucred;
2136 	AUDIT_ARG_VALUE(mode);
2137 	NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2138 	    AUDITVNODE1, pathseg, path, fd, td);
2139 	if ((error = namei(&nd)) != 0)
2140 		goto out1;
2141 	vfslocked = NDHASGIANT(&nd);
2142 	vp = nd.ni_vp;
2143 
2144 	error = vn_access(vp, mode, tmpcred, td);
2145 	NDFREE(&nd, NDF_ONLY_PNBUF);
2146 	vput(vp);
2147 	VFS_UNLOCK_GIANT(vfslocked);
2148 out1:
2149 	if (!(flags & AT_EACCESS)) {
2150 		td->td_ucred = cred;
2151 		crfree(tmpcred);
2152 	}
2153 	return (error);
2154 }
2155 
2156 /*
2157  * Check access permissions using "effective" credentials.
2158  */
2159 #ifndef _SYS_SYSPROTO_H_
2160 struct eaccess_args {
2161 	char	*path;
2162 	int	flags;
2163 };
2164 #endif
2165 int
2166 eaccess(td, uap)
2167 	struct thread *td;
2168 	register struct eaccess_args /* {
2169 		char *path;
2170 		int flags;
2171 	} */ *uap;
2172 {
2173 
2174 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
2175 }
2176 
2177 int
2178 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
2179 {
2180 
2181 	return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
2182 }
2183 
2184 #if defined(COMPAT_43)
2185 /*
2186  * Get file status; this version follows links.
2187  */
2188 #ifndef _SYS_SYSPROTO_H_
2189 struct ostat_args {
2190 	char	*path;
2191 	struct ostat *ub;
2192 };
2193 #endif
2194 int
2195 ostat(td, uap)
2196 	struct thread *td;
2197 	register struct ostat_args /* {
2198 		char *path;
2199 		struct ostat *ub;
2200 	} */ *uap;
2201 {
2202 	struct stat sb;
2203 	struct ostat osb;
2204 	int error;
2205 
2206 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2207 	if (error)
2208 		return (error);
2209 	cvtstat(&sb, &osb);
2210 	error = copyout(&osb, uap->ub, sizeof (osb));
2211 	return (error);
2212 }
2213 
2214 /*
2215  * Get file status; this version does not follow links.
2216  */
2217 #ifndef _SYS_SYSPROTO_H_
2218 struct olstat_args {
2219 	char	*path;
2220 	struct ostat *ub;
2221 };
2222 #endif
2223 int
2224 olstat(td, uap)
2225 	struct thread *td;
2226 	register struct olstat_args /* {
2227 		char *path;
2228 		struct ostat *ub;
2229 	} */ *uap;
2230 {
2231 	struct stat sb;
2232 	struct ostat osb;
2233 	int error;
2234 
2235 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2236 	if (error)
2237 		return (error);
2238 	cvtstat(&sb, &osb);
2239 	error = copyout(&osb, uap->ub, sizeof (osb));
2240 	return (error);
2241 }
2242 
2243 /*
2244  * Convert from an old to a new stat structure.
2245  */
2246 void
2247 cvtstat(st, ost)
2248 	struct stat *st;
2249 	struct ostat *ost;
2250 {
2251 
2252 	ost->st_dev = st->st_dev;
2253 	ost->st_ino = st->st_ino;
2254 	ost->st_mode = st->st_mode;
2255 	ost->st_nlink = st->st_nlink;
2256 	ost->st_uid = st->st_uid;
2257 	ost->st_gid = st->st_gid;
2258 	ost->st_rdev = st->st_rdev;
2259 	if (st->st_size < (quad_t)1 << 32)
2260 		ost->st_size = st->st_size;
2261 	else
2262 		ost->st_size = -2;
2263 	ost->st_atime = st->st_atime;
2264 	ost->st_mtime = st->st_mtime;
2265 	ost->st_ctime = st->st_ctime;
2266 	ost->st_blksize = st->st_blksize;
2267 	ost->st_blocks = st->st_blocks;
2268 	ost->st_flags = st->st_flags;
2269 	ost->st_gen = st->st_gen;
2270 }
2271 #endif /* COMPAT_43 */
2272 
2273 /*
2274  * Get file status; this version follows links.
2275  */
2276 #ifndef _SYS_SYSPROTO_H_
2277 struct stat_args {
2278 	char	*path;
2279 	struct stat *ub;
2280 };
2281 #endif
2282 int
2283 stat(td, uap)
2284 	struct thread *td;
2285 	register struct stat_args /* {
2286 		char *path;
2287 		struct stat *ub;
2288 	} */ *uap;
2289 {
2290 	struct stat sb;
2291 	int error;
2292 
2293 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2294 	if (error == 0)
2295 		error = copyout(&sb, uap->ub, sizeof (sb));
2296 	return (error);
2297 }
2298 
2299 #ifndef _SYS_SYSPROTO_H_
2300 struct fstatat_args {
2301 	int	fd;
2302 	char	*path;
2303 	struct stat	*buf;
2304 	int	flag;
2305 }
2306 #endif
2307 int
2308 fstatat(struct thread *td, struct fstatat_args *uap)
2309 {
2310 	struct stat sb;
2311 	int error;
2312 
2313 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2314 	    UIO_USERSPACE, &sb);
2315 	if (error == 0)
2316 		error = copyout(&sb, uap->buf, sizeof (sb));
2317 	return (error);
2318 }
2319 
2320 int
2321 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2322 {
2323 
2324 	return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
2325 }
2326 
2327 int
2328 kern_statat(struct thread *td, int flag, int fd, char *path,
2329     enum uio_seg pathseg, struct stat *sbp)
2330 {
2331 
2332 	return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
2333 }
2334 
2335 int
2336 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
2337     enum uio_seg pathseg, struct stat *sbp,
2338     void (*hook)(struct vnode *vp, struct stat *sbp))
2339 {
2340 	struct nameidata nd;
2341 	struct stat sb;
2342 	int error, vfslocked;
2343 
2344 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2345 		return (EINVAL);
2346 
2347 	NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2348 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
2349 	    path, fd, td);
2350 
2351 	if ((error = namei(&nd)) != 0)
2352 		return (error);
2353 	vfslocked = NDHASGIANT(&nd);
2354 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2355 	if (!error) {
2356 		SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
2357 		if (S_ISREG(sb.st_mode))
2358 			SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
2359 		if (__predict_false(hook != NULL))
2360 			hook(nd.ni_vp, &sb);
2361 	}
2362 	NDFREE(&nd, NDF_ONLY_PNBUF);
2363 	vput(nd.ni_vp);
2364 	VFS_UNLOCK_GIANT(vfslocked);
2365 	if (error)
2366 		return (error);
2367 	*sbp = sb;
2368 #ifdef KTRACE
2369 	if (KTRPOINT(td, KTR_STRUCT))
2370 		ktrstat(&sb);
2371 #endif
2372 	return (0);
2373 }
2374 
2375 /*
2376  * Get file status; this version does not follow links.
2377  */
2378 #ifndef _SYS_SYSPROTO_H_
2379 struct lstat_args {
2380 	char	*path;
2381 	struct stat *ub;
2382 };
2383 #endif
2384 int
2385 lstat(td, uap)
2386 	struct thread *td;
2387 	register struct lstat_args /* {
2388 		char *path;
2389 		struct stat *ub;
2390 	} */ *uap;
2391 {
2392 	struct stat sb;
2393 	int error;
2394 
2395 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2396 	if (error == 0)
2397 		error = copyout(&sb, uap->ub, sizeof (sb));
2398 	return (error);
2399 }
2400 
2401 int
2402 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2403 {
2404 
2405 	return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
2406 	    sbp));
2407 }
2408 
2409 /*
2410  * Implementation of the NetBSD [l]stat() functions.
2411  */
2412 void
2413 cvtnstat(sb, nsb)
2414 	struct stat *sb;
2415 	struct nstat *nsb;
2416 {
2417 	bzero(nsb, sizeof *nsb);
2418 	nsb->st_dev = sb->st_dev;
2419 	nsb->st_ino = sb->st_ino;
2420 	nsb->st_mode = sb->st_mode;
2421 	nsb->st_nlink = sb->st_nlink;
2422 	nsb->st_uid = sb->st_uid;
2423 	nsb->st_gid = sb->st_gid;
2424 	nsb->st_rdev = sb->st_rdev;
2425 	nsb->st_atimespec = sb->st_atimespec;
2426 	nsb->st_mtimespec = sb->st_mtimespec;
2427 	nsb->st_ctimespec = sb->st_ctimespec;
2428 	nsb->st_size = sb->st_size;
2429 	nsb->st_blocks = sb->st_blocks;
2430 	nsb->st_blksize = sb->st_blksize;
2431 	nsb->st_flags = sb->st_flags;
2432 	nsb->st_gen = sb->st_gen;
2433 	nsb->st_birthtimespec = sb->st_birthtimespec;
2434 }
2435 
2436 #ifndef _SYS_SYSPROTO_H_
2437 struct nstat_args {
2438 	char	*path;
2439 	struct nstat *ub;
2440 };
2441 #endif
2442 int
2443 nstat(td, uap)
2444 	struct thread *td;
2445 	register struct nstat_args /* {
2446 		char *path;
2447 		struct nstat *ub;
2448 	} */ *uap;
2449 {
2450 	struct stat sb;
2451 	struct nstat nsb;
2452 	int error;
2453 
2454 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2455 	if (error)
2456 		return (error);
2457 	cvtnstat(&sb, &nsb);
2458 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2459 	return (error);
2460 }
2461 
2462 /*
2463  * NetBSD lstat.  Get file status; this version does not follow links.
2464  */
2465 #ifndef _SYS_SYSPROTO_H_
2466 struct lstat_args {
2467 	char	*path;
2468 	struct stat *ub;
2469 };
2470 #endif
2471 int
2472 nlstat(td, uap)
2473 	struct thread *td;
2474 	register struct nlstat_args /* {
2475 		char *path;
2476 		struct nstat *ub;
2477 	} */ *uap;
2478 {
2479 	struct stat sb;
2480 	struct nstat nsb;
2481 	int error;
2482 
2483 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2484 	if (error)
2485 		return (error);
2486 	cvtnstat(&sb, &nsb);
2487 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2488 	return (error);
2489 }
2490 
2491 /*
2492  * Get configurable pathname variables.
2493  */
2494 #ifndef _SYS_SYSPROTO_H_
2495 struct pathconf_args {
2496 	char	*path;
2497 	int	name;
2498 };
2499 #endif
2500 int
2501 pathconf(td, uap)
2502 	struct thread *td;
2503 	register struct pathconf_args /* {
2504 		char *path;
2505 		int name;
2506 	} */ *uap;
2507 {
2508 
2509 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
2510 }
2511 
2512 #ifndef _SYS_SYSPROTO_H_
2513 struct lpathconf_args {
2514 	char	*path;
2515 	int	name;
2516 };
2517 #endif
2518 int
2519 lpathconf(td, uap)
2520 	struct thread *td;
2521 	register struct lpathconf_args /* {
2522 		char *path;
2523 		int name;
2524 	} */ *uap;
2525 {
2526 
2527 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
2528 }
2529 
2530 int
2531 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
2532     u_long flags)
2533 {
2534 	struct nameidata nd;
2535 	int error, vfslocked;
2536 
2537 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
2538 	    flags, pathseg, path, td);
2539 	if ((error = namei(&nd)) != 0)
2540 		return (error);
2541 	vfslocked = NDHASGIANT(&nd);
2542 	NDFREE(&nd, NDF_ONLY_PNBUF);
2543 
2544 	/* If asynchronous I/O is available, it works for all files. */
2545 	if (name == _PC_ASYNC_IO)
2546 		td->td_retval[0] = async_io_version;
2547 	else
2548 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2549 	vput(nd.ni_vp);
2550 	VFS_UNLOCK_GIANT(vfslocked);
2551 	return (error);
2552 }
2553 
2554 /*
2555  * Return target name of a symbolic link.
2556  */
2557 #ifndef _SYS_SYSPROTO_H_
2558 struct readlink_args {
2559 	char	*path;
2560 	char	*buf;
2561 	size_t	count;
2562 };
2563 #endif
2564 int
2565 readlink(td, uap)
2566 	struct thread *td;
2567 	register struct readlink_args /* {
2568 		char *path;
2569 		char *buf;
2570 		size_t count;
2571 	} */ *uap;
2572 {
2573 
2574 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2575 	    UIO_USERSPACE, uap->count));
2576 }
2577 #ifndef _SYS_SYSPROTO_H_
2578 struct readlinkat_args {
2579 	int	fd;
2580 	char	*path;
2581 	char	*buf;
2582 	size_t	bufsize;
2583 };
2584 #endif
2585 int
2586 readlinkat(struct thread *td, struct readlinkat_args *uap)
2587 {
2588 
2589 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
2590 	    uap->buf, UIO_USERSPACE, uap->bufsize));
2591 }
2592 
2593 int
2594 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2595     enum uio_seg bufseg, size_t count)
2596 {
2597 
2598 	return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
2599 	    count));
2600 }
2601 
2602 int
2603 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2604     char *buf, enum uio_seg bufseg, size_t count)
2605 {
2606 	struct vnode *vp;
2607 	struct iovec aiov;
2608 	struct uio auio;
2609 	int error;
2610 	struct nameidata nd;
2611 	int vfslocked;
2612 
2613 	if (count > INT_MAX)
2614 		return (EINVAL);
2615 
2616 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2617 	    AUDITVNODE1, pathseg, path, fd, td);
2618 
2619 	if ((error = namei(&nd)) != 0)
2620 		return (error);
2621 	NDFREE(&nd, NDF_ONLY_PNBUF);
2622 	vfslocked = NDHASGIANT(&nd);
2623 	vp = nd.ni_vp;
2624 #ifdef MAC
2625 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2626 	if (error) {
2627 		vput(vp);
2628 		VFS_UNLOCK_GIANT(vfslocked);
2629 		return (error);
2630 	}
2631 #endif
2632 	if (vp->v_type != VLNK)
2633 		error = EINVAL;
2634 	else {
2635 		aiov.iov_base = buf;
2636 		aiov.iov_len = count;
2637 		auio.uio_iov = &aiov;
2638 		auio.uio_iovcnt = 1;
2639 		auio.uio_offset = 0;
2640 		auio.uio_rw = UIO_READ;
2641 		auio.uio_segflg = bufseg;
2642 		auio.uio_td = td;
2643 		auio.uio_resid = count;
2644 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2645 	}
2646 	vput(vp);
2647 	VFS_UNLOCK_GIANT(vfslocked);
2648 	td->td_retval[0] = count - auio.uio_resid;
2649 	return (error);
2650 }
2651 
2652 /*
2653  * Common implementation code for chflags() and fchflags().
2654  */
2655 static int
2656 setfflags(td, vp, flags)
2657 	struct thread *td;
2658 	struct vnode *vp;
2659 	int flags;
2660 {
2661 	int error;
2662 	struct mount *mp;
2663 	struct vattr vattr;
2664 
2665 	/*
2666 	 * Prevent non-root users from setting flags on devices.  When
2667 	 * a device is reused, users can retain ownership of the device
2668 	 * if they are allowed to set flags and programs assume that
2669 	 * chown can't fail when done as root.
2670 	 */
2671 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2672 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2673 		if (error)
2674 			return (error);
2675 	}
2676 
2677 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2678 		return (error);
2679 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2680 	VATTR_NULL(&vattr);
2681 	vattr.va_flags = flags;
2682 #ifdef MAC
2683 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2684 	if (error == 0)
2685 #endif
2686 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2687 	VOP_UNLOCK(vp, 0);
2688 	vn_finished_write(mp);
2689 	return (error);
2690 }
2691 
2692 /*
2693  * Change flags of a file given a path name.
2694  */
2695 #ifndef _SYS_SYSPROTO_H_
2696 struct chflags_args {
2697 	char	*path;
2698 	int	flags;
2699 };
2700 #endif
2701 int
2702 chflags(td, uap)
2703 	struct thread *td;
2704 	register struct chflags_args /* {
2705 		char *path;
2706 		int flags;
2707 	} */ *uap;
2708 {
2709 	int error;
2710 	struct nameidata nd;
2711 	int vfslocked;
2712 
2713 	AUDIT_ARG_FFLAGS(uap->flags);
2714 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2715 	    uap->path, td);
2716 	if ((error = namei(&nd)) != 0)
2717 		return (error);
2718 	NDFREE(&nd, NDF_ONLY_PNBUF);
2719 	vfslocked = NDHASGIANT(&nd);
2720 	error = setfflags(td, nd.ni_vp, uap->flags);
2721 	vrele(nd.ni_vp);
2722 	VFS_UNLOCK_GIANT(vfslocked);
2723 	return (error);
2724 }
2725 
2726 /*
2727  * Same as chflags() but doesn't follow symlinks.
2728  */
2729 int
2730 lchflags(td, uap)
2731 	struct thread *td;
2732 	register struct lchflags_args /* {
2733 		char *path;
2734 		int flags;
2735 	} */ *uap;
2736 {
2737 	int error;
2738 	struct nameidata nd;
2739 	int vfslocked;
2740 
2741 	AUDIT_ARG_FFLAGS(uap->flags);
2742 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2743 	    uap->path, td);
2744 	if ((error = namei(&nd)) != 0)
2745 		return (error);
2746 	vfslocked = NDHASGIANT(&nd);
2747 	NDFREE(&nd, NDF_ONLY_PNBUF);
2748 	error = setfflags(td, nd.ni_vp, uap->flags);
2749 	vrele(nd.ni_vp);
2750 	VFS_UNLOCK_GIANT(vfslocked);
2751 	return (error);
2752 }
2753 
2754 /*
2755  * Change flags of a file given a file descriptor.
2756  */
2757 #ifndef _SYS_SYSPROTO_H_
2758 struct fchflags_args {
2759 	int	fd;
2760 	int	flags;
2761 };
2762 #endif
2763 int
2764 fchflags(td, uap)
2765 	struct thread *td;
2766 	register struct fchflags_args /* {
2767 		int fd;
2768 		int flags;
2769 	} */ *uap;
2770 {
2771 	struct file *fp;
2772 	int vfslocked;
2773 	int error;
2774 
2775 	AUDIT_ARG_FD(uap->fd);
2776 	AUDIT_ARG_FFLAGS(uap->flags);
2777 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2778 		return (error);
2779 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2780 #ifdef AUDIT
2781 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2782 	AUDIT_ARG_VNODE1(fp->f_vnode);
2783 	VOP_UNLOCK(fp->f_vnode, 0);
2784 #endif
2785 	error = setfflags(td, fp->f_vnode, uap->flags);
2786 	VFS_UNLOCK_GIANT(vfslocked);
2787 	fdrop(fp, td);
2788 	return (error);
2789 }
2790 
2791 /*
2792  * Common implementation code for chmod(), lchmod() and fchmod().
2793  */
2794 static int
2795 setfmode(td, vp, mode)
2796 	struct thread *td;
2797 	struct vnode *vp;
2798 	int mode;
2799 {
2800 	int error;
2801 	struct mount *mp;
2802 	struct vattr vattr;
2803 
2804 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2805 		return (error);
2806 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2807 	VATTR_NULL(&vattr);
2808 	vattr.va_mode = mode & ALLPERMS;
2809 #ifdef MAC
2810 	error = mac_vnode_check_setmode(td->td_ucred, vp, vattr.va_mode);
2811 	if (error == 0)
2812 #endif
2813 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2814 	VOP_UNLOCK(vp, 0);
2815 	vn_finished_write(mp);
2816 	return (error);
2817 }
2818 
2819 /*
2820  * Change mode of a file given path name.
2821  */
2822 #ifndef _SYS_SYSPROTO_H_
2823 struct chmod_args {
2824 	char	*path;
2825 	int	mode;
2826 };
2827 #endif
2828 int
2829 chmod(td, uap)
2830 	struct thread *td;
2831 	register struct chmod_args /* {
2832 		char *path;
2833 		int mode;
2834 	} */ *uap;
2835 {
2836 
2837 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2838 }
2839 
2840 #ifndef _SYS_SYSPROTO_H_
2841 struct fchmodat_args {
2842 	int	dirfd;
2843 	char	*path;
2844 	mode_t	mode;
2845 	int	flag;
2846 }
2847 #endif
2848 int
2849 fchmodat(struct thread *td, struct fchmodat_args *uap)
2850 {
2851 	int flag = uap->flag;
2852 	int fd = uap->fd;
2853 	char *path = uap->path;
2854 	mode_t mode = uap->mode;
2855 
2856 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2857 		return (EINVAL);
2858 
2859 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2860 }
2861 
2862 int
2863 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2864 {
2865 
2866 	return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
2867 }
2868 
2869 /*
2870  * Change mode of a file given path name (don't follow links.)
2871  */
2872 #ifndef _SYS_SYSPROTO_H_
2873 struct lchmod_args {
2874 	char	*path;
2875 	int	mode;
2876 };
2877 #endif
2878 int
2879 lchmod(td, uap)
2880 	struct thread *td;
2881 	register struct lchmod_args /* {
2882 		char *path;
2883 		int mode;
2884 	} */ *uap;
2885 {
2886 
2887 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2888 	    uap->mode, AT_SYMLINK_NOFOLLOW));
2889 }
2890 
2891 
2892 int
2893 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2894     mode_t mode, int flag)
2895 {
2896 	int error;
2897 	struct nameidata nd;
2898 	int vfslocked;
2899 	int follow;
2900 
2901 	AUDIT_ARG_MODE(mode);
2902 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2903 	NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
2904 	    fd, td);
2905 	if ((error = namei(&nd)) != 0)
2906 		return (error);
2907 	vfslocked = NDHASGIANT(&nd);
2908 	NDFREE(&nd, NDF_ONLY_PNBUF);
2909 	error = setfmode(td, nd.ni_vp, mode);
2910 	vrele(nd.ni_vp);
2911 	VFS_UNLOCK_GIANT(vfslocked);
2912 	return (error);
2913 }
2914 
2915 /*
2916  * Change mode of a file given a file descriptor.
2917  */
2918 #ifndef _SYS_SYSPROTO_H_
2919 struct fchmod_args {
2920 	int	fd;
2921 	int	mode;
2922 };
2923 #endif
2924 int
2925 fchmod(td, uap)
2926 	struct thread *td;
2927 	register struct fchmod_args /* {
2928 		int fd;
2929 		int mode;
2930 	} */ *uap;
2931 {
2932 	struct file *fp;
2933 	int vfslocked;
2934 	int error;
2935 
2936 	AUDIT_ARG_FD(uap->fd);
2937 	AUDIT_ARG_MODE(uap->mode);
2938 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2939 		return (error);
2940 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2941 #ifdef AUDIT
2942 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2943 	AUDIT_ARG_VNODE1(fp->f_vnode);
2944 	VOP_UNLOCK(fp->f_vnode, 0);
2945 #endif
2946 	error = setfmode(td, fp->f_vnode, uap->mode);
2947 	VFS_UNLOCK_GIANT(vfslocked);
2948 	fdrop(fp, td);
2949 	return (error);
2950 }
2951 
2952 /*
2953  * Common implementation for chown(), lchown(), and fchown()
2954  */
2955 static int
2956 setfown(td, vp, uid, gid)
2957 	struct thread *td;
2958 	struct vnode *vp;
2959 	uid_t uid;
2960 	gid_t gid;
2961 {
2962 	int error;
2963 	struct mount *mp;
2964 	struct vattr vattr;
2965 
2966 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2967 		return (error);
2968 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2969 	VATTR_NULL(&vattr);
2970 	vattr.va_uid = uid;
2971 	vattr.va_gid = gid;
2972 #ifdef MAC
2973 	error = mac_vnode_check_setowner(td->td_ucred, vp, vattr.va_uid,
2974 	    vattr.va_gid);
2975 	if (error == 0)
2976 #endif
2977 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2978 	VOP_UNLOCK(vp, 0);
2979 	vn_finished_write(mp);
2980 	return (error);
2981 }
2982 
2983 /*
2984  * Set ownership given a path name.
2985  */
2986 #ifndef _SYS_SYSPROTO_H_
2987 struct chown_args {
2988 	char	*path;
2989 	int	uid;
2990 	int	gid;
2991 };
2992 #endif
2993 int
2994 chown(td, uap)
2995 	struct thread *td;
2996 	register struct chown_args /* {
2997 		char *path;
2998 		int uid;
2999 		int gid;
3000 	} */ *uap;
3001 {
3002 
3003 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3004 }
3005 
3006 #ifndef _SYS_SYSPROTO_H_
3007 struct fchownat_args {
3008 	int fd;
3009 	const char * path;
3010 	uid_t uid;
3011 	gid_t gid;
3012 	int flag;
3013 };
3014 #endif
3015 int
3016 fchownat(struct thread *td, struct fchownat_args *uap)
3017 {
3018 	int flag;
3019 
3020 	flag = uap->flag;
3021 	if (flag & ~AT_SYMLINK_NOFOLLOW)
3022 		return (EINVAL);
3023 
3024 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
3025 	    uap->gid, uap->flag));
3026 }
3027 
3028 int
3029 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3030     int gid)
3031 {
3032 
3033 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
3034 }
3035 
3036 int
3037 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3038     int uid, int gid, int flag)
3039 {
3040 	struct nameidata nd;
3041 	int error, vfslocked, follow;
3042 
3043 	AUDIT_ARG_OWNER(uid, gid);
3044 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3045 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
3046 	    fd, td);
3047 
3048 	if ((error = namei(&nd)) != 0)
3049 		return (error);
3050 	vfslocked = NDHASGIANT(&nd);
3051 	NDFREE(&nd, NDF_ONLY_PNBUF);
3052 	error = setfown(td, nd.ni_vp, uid, gid);
3053 	vrele(nd.ni_vp);
3054 	VFS_UNLOCK_GIANT(vfslocked);
3055 	return (error);
3056 }
3057 
3058 /*
3059  * Set ownership given a path name, do not cross symlinks.
3060  */
3061 #ifndef _SYS_SYSPROTO_H_
3062 struct lchown_args {
3063 	char	*path;
3064 	int	uid;
3065 	int	gid;
3066 };
3067 #endif
3068 int
3069 lchown(td, uap)
3070 	struct thread *td;
3071 	register struct lchown_args /* {
3072 		char *path;
3073 		int uid;
3074 		int gid;
3075 	} */ *uap;
3076 {
3077 
3078 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3079 }
3080 
3081 int
3082 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3083     int gid)
3084 {
3085 
3086 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
3087 	    AT_SYMLINK_NOFOLLOW));
3088 }
3089 
3090 /*
3091  * Set ownership given a file descriptor.
3092  */
3093 #ifndef _SYS_SYSPROTO_H_
3094 struct fchown_args {
3095 	int	fd;
3096 	int	uid;
3097 	int	gid;
3098 };
3099 #endif
3100 int
3101 fchown(td, uap)
3102 	struct thread *td;
3103 	register struct fchown_args /* {
3104 		int fd;
3105 		int uid;
3106 		int gid;
3107 	} */ *uap;
3108 {
3109 	struct file *fp;
3110 	int vfslocked;
3111 	int error;
3112 
3113 	AUDIT_ARG_FD(uap->fd);
3114 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
3115 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3116 		return (error);
3117 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3118 #ifdef AUDIT
3119 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3120 	AUDIT_ARG_VNODE1(fp->f_vnode);
3121 	VOP_UNLOCK(fp->f_vnode, 0);
3122 #endif
3123 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
3124 	VFS_UNLOCK_GIANT(vfslocked);
3125 	fdrop(fp, td);
3126 	return (error);
3127 }
3128 
3129 /*
3130  * Common implementation code for utimes(), lutimes(), and futimes().
3131  */
3132 static int
3133 getutimes(usrtvp, tvpseg, tsp)
3134 	const struct timeval *usrtvp;
3135 	enum uio_seg tvpseg;
3136 	struct timespec *tsp;
3137 {
3138 	struct timeval tv[2];
3139 	const struct timeval *tvp;
3140 	int error;
3141 
3142 	if (usrtvp == NULL) {
3143 		vfs_timestamp(&tsp[0]);
3144 		tsp[1] = tsp[0];
3145 	} else {
3146 		if (tvpseg == UIO_SYSSPACE) {
3147 			tvp = usrtvp;
3148 		} else {
3149 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
3150 				return (error);
3151 			tvp = tv;
3152 		}
3153 
3154 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
3155 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
3156 			return (EINVAL);
3157 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3158 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3159 	}
3160 	return (0);
3161 }
3162 
3163 /*
3164  * Common implementation code for utimes(), lutimes(), and futimes().
3165  */
3166 static int
3167 setutimes(td, vp, ts, numtimes, nullflag)
3168 	struct thread *td;
3169 	struct vnode *vp;
3170 	const struct timespec *ts;
3171 	int numtimes;
3172 	int nullflag;
3173 {
3174 	int error, setbirthtime;
3175 	struct mount *mp;
3176 	struct vattr vattr;
3177 
3178 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3179 		return (error);
3180 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3181 	setbirthtime = 0;
3182 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3183 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3184 		setbirthtime = 1;
3185 	VATTR_NULL(&vattr);
3186 	vattr.va_atime = ts[0];
3187 	vattr.va_mtime = ts[1];
3188 	if (setbirthtime)
3189 		vattr.va_birthtime = ts[1];
3190 	if (numtimes > 2)
3191 		vattr.va_birthtime = ts[2];
3192 	if (nullflag)
3193 		vattr.va_vaflags |= VA_UTIMES_NULL;
3194 #ifdef MAC
3195 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3196 	    vattr.va_mtime);
3197 #endif
3198 	if (error == 0)
3199 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3200 	VOP_UNLOCK(vp, 0);
3201 	vn_finished_write(mp);
3202 	return (error);
3203 }
3204 
3205 /*
3206  * Set the access and modification times of a file.
3207  */
3208 #ifndef _SYS_SYSPROTO_H_
3209 struct utimes_args {
3210 	char	*path;
3211 	struct	timeval *tptr;
3212 };
3213 #endif
3214 int
3215 utimes(td, uap)
3216 	struct thread *td;
3217 	register struct utimes_args /* {
3218 		char *path;
3219 		struct timeval *tptr;
3220 	} */ *uap;
3221 {
3222 
3223 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3224 	    UIO_USERSPACE));
3225 }
3226 
3227 #ifndef _SYS_SYSPROTO_H_
3228 struct futimesat_args {
3229 	int fd;
3230 	const char * path;
3231 	const struct timeval * times;
3232 };
3233 #endif
3234 int
3235 futimesat(struct thread *td, struct futimesat_args *uap)
3236 {
3237 
3238 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3239 	    uap->times, UIO_USERSPACE));
3240 }
3241 
3242 int
3243 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
3244     struct timeval *tptr, enum uio_seg tptrseg)
3245 {
3246 
3247 	return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
3248 }
3249 
3250 int
3251 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3252     struct timeval *tptr, enum uio_seg tptrseg)
3253 {
3254 	struct nameidata nd;
3255 	struct timespec ts[2];
3256 	int error, vfslocked;
3257 
3258 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3259 		return (error);
3260 	NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
3261 	    fd, td);
3262 
3263 	if ((error = namei(&nd)) != 0)
3264 		return (error);
3265 	vfslocked = NDHASGIANT(&nd);
3266 	NDFREE(&nd, NDF_ONLY_PNBUF);
3267 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3268 	vrele(nd.ni_vp);
3269 	VFS_UNLOCK_GIANT(vfslocked);
3270 	return (error);
3271 }
3272 
3273 /*
3274  * Set the access and modification times of a file.
3275  */
3276 #ifndef _SYS_SYSPROTO_H_
3277 struct lutimes_args {
3278 	char	*path;
3279 	struct	timeval *tptr;
3280 };
3281 #endif
3282 int
3283 lutimes(td, uap)
3284 	struct thread *td;
3285 	register struct lutimes_args /* {
3286 		char *path;
3287 		struct timeval *tptr;
3288 	} */ *uap;
3289 {
3290 
3291 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3292 	    UIO_USERSPACE));
3293 }
3294 
3295 int
3296 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3297     struct timeval *tptr, enum uio_seg tptrseg)
3298 {
3299 	struct timespec ts[2];
3300 	int error;
3301 	struct nameidata nd;
3302 	int vfslocked;
3303 
3304 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3305 		return (error);
3306 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3307 	if ((error = namei(&nd)) != 0)
3308 		return (error);
3309 	vfslocked = NDHASGIANT(&nd);
3310 	NDFREE(&nd, NDF_ONLY_PNBUF);
3311 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3312 	vrele(nd.ni_vp);
3313 	VFS_UNLOCK_GIANT(vfslocked);
3314 	return (error);
3315 }
3316 
3317 /*
3318  * Set the access and modification times of a file.
3319  */
3320 #ifndef _SYS_SYSPROTO_H_
3321 struct futimes_args {
3322 	int	fd;
3323 	struct	timeval *tptr;
3324 };
3325 #endif
3326 int
3327 futimes(td, uap)
3328 	struct thread *td;
3329 	register struct futimes_args /* {
3330 		int  fd;
3331 		struct timeval *tptr;
3332 	} */ *uap;
3333 {
3334 
3335 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3336 }
3337 
3338 int
3339 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3340     enum uio_seg tptrseg)
3341 {
3342 	struct timespec ts[2];
3343 	struct file *fp;
3344 	int vfslocked;
3345 	int error;
3346 
3347 	AUDIT_ARG_FD(fd);
3348 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3349 		return (error);
3350 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
3351 		return (error);
3352 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3353 #ifdef AUDIT
3354 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3355 	AUDIT_ARG_VNODE1(fp->f_vnode);
3356 	VOP_UNLOCK(fp->f_vnode, 0);
3357 #endif
3358 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3359 	VFS_UNLOCK_GIANT(vfslocked);
3360 	fdrop(fp, td);
3361 	return (error);
3362 }
3363 
3364 /*
3365  * Truncate a file given its path name.
3366  */
3367 #ifndef _SYS_SYSPROTO_H_
3368 struct truncate_args {
3369 	char	*path;
3370 	int	pad;
3371 	off_t	length;
3372 };
3373 #endif
3374 int
3375 truncate(td, uap)
3376 	struct thread *td;
3377 	register struct truncate_args /* {
3378 		char *path;
3379 		int pad;
3380 		off_t length;
3381 	} */ *uap;
3382 {
3383 
3384 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3385 }
3386 
3387 int
3388 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3389 {
3390 	struct mount *mp;
3391 	struct vnode *vp;
3392 	struct vattr vattr;
3393 	int error;
3394 	struct nameidata nd;
3395 	int vfslocked;
3396 
3397 	if (length < 0)
3398 		return(EINVAL);
3399 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3400 	if ((error = namei(&nd)) != 0)
3401 		return (error);
3402 	vfslocked = NDHASGIANT(&nd);
3403 	vp = nd.ni_vp;
3404 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3405 		vrele(vp);
3406 		VFS_UNLOCK_GIANT(vfslocked);
3407 		return (error);
3408 	}
3409 	NDFREE(&nd, NDF_ONLY_PNBUF);
3410 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3411 	if (vp->v_type == VDIR)
3412 		error = EISDIR;
3413 #ifdef MAC
3414 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3415 	}
3416 #endif
3417 	else if ((error = vn_writechk(vp)) == 0 &&
3418 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3419 		VATTR_NULL(&vattr);
3420 		vattr.va_size = length;
3421 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3422 	}
3423 	vput(vp);
3424 	vn_finished_write(mp);
3425 	VFS_UNLOCK_GIANT(vfslocked);
3426 	return (error);
3427 }
3428 
3429 #if defined(COMPAT_43)
3430 /*
3431  * Truncate a file given its path name.
3432  */
3433 #ifndef _SYS_SYSPROTO_H_
3434 struct otruncate_args {
3435 	char	*path;
3436 	long	length;
3437 };
3438 #endif
3439 int
3440 otruncate(td, uap)
3441 	struct thread *td;
3442 	register struct otruncate_args /* {
3443 		char *path;
3444 		long length;
3445 	} */ *uap;
3446 {
3447 	struct truncate_args /* {
3448 		char *path;
3449 		int pad;
3450 		off_t length;
3451 	} */ nuap;
3452 
3453 	nuap.path = uap->path;
3454 	nuap.length = uap->length;
3455 	return (truncate(td, &nuap));
3456 }
3457 #endif /* COMPAT_43 */
3458 
3459 /* Versions with the pad argument */
3460 int
3461 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3462 {
3463 	struct truncate_args ouap;
3464 
3465 	ouap.path = uap->path;
3466 	ouap.length = uap->length;
3467 	return (truncate(td, &ouap));
3468 }
3469 
3470 int
3471 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3472 {
3473 	struct ftruncate_args ouap;
3474 
3475 	ouap.fd = uap->fd;
3476 	ouap.length = uap->length;
3477 	return (ftruncate(td, &ouap));
3478 }
3479 
3480 /*
3481  * Sync an open file.
3482  */
3483 #ifndef _SYS_SYSPROTO_H_
3484 struct fsync_args {
3485 	int	fd;
3486 };
3487 #endif
3488 int
3489 fsync(td, uap)
3490 	struct thread *td;
3491 	struct fsync_args /* {
3492 		int fd;
3493 	} */ *uap;
3494 {
3495 	struct vnode *vp;
3496 	struct mount *mp;
3497 	struct file *fp;
3498 	int vfslocked;
3499 	int error, lock_flags;
3500 
3501 	AUDIT_ARG_FD(uap->fd);
3502 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3503 		return (error);
3504 	vp = fp->f_vnode;
3505 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3506 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3507 		goto drop;
3508 	if (MNT_SHARED_WRITES(mp) ||
3509 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
3510 		lock_flags = LK_SHARED;
3511 	} else {
3512 		lock_flags = LK_EXCLUSIVE;
3513 	}
3514 	vn_lock(vp, lock_flags | LK_RETRY);
3515 	AUDIT_ARG_VNODE1(vp);
3516 	if (vp->v_object != NULL) {
3517 		VM_OBJECT_LOCK(vp->v_object);
3518 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3519 		VM_OBJECT_UNLOCK(vp->v_object);
3520 	}
3521 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3522 
3523 	VOP_UNLOCK(vp, 0);
3524 	vn_finished_write(mp);
3525 drop:
3526 	VFS_UNLOCK_GIANT(vfslocked);
3527 	fdrop(fp, td);
3528 	return (error);
3529 }
3530 
3531 /*
3532  * Rename files.  Source and destination must either both be directories, or
3533  * both not be directories.  If target is a directory, it must be empty.
3534  */
3535 #ifndef _SYS_SYSPROTO_H_
3536 struct rename_args {
3537 	char	*from;
3538 	char	*to;
3539 };
3540 #endif
3541 int
3542 rename(td, uap)
3543 	struct thread *td;
3544 	register struct rename_args /* {
3545 		char *from;
3546 		char *to;
3547 	} */ *uap;
3548 {
3549 
3550 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3551 }
3552 
3553 #ifndef _SYS_SYSPROTO_H_
3554 struct renameat_args {
3555 	int	oldfd;
3556 	char	*old;
3557 	int	newfd;
3558 	char	*new;
3559 };
3560 #endif
3561 int
3562 renameat(struct thread *td, struct renameat_args *uap)
3563 {
3564 
3565 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3566 	    UIO_USERSPACE));
3567 }
3568 
3569 int
3570 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3571 {
3572 
3573 	return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
3574 }
3575 
3576 int
3577 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3578     enum uio_seg pathseg)
3579 {
3580 	struct mount *mp = NULL;
3581 	struct vnode *tvp, *fvp, *tdvp;
3582 	struct nameidata fromnd, tond;
3583 	int tvfslocked;
3584 	int fvfslocked;
3585 	int error;
3586 
3587 	bwillwrite();
3588 #ifdef MAC
3589 	NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3590 	    AUDITVNODE1, pathseg, old, oldfd, td);
3591 #else
3592 	NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3593 	    AUDITVNODE1, pathseg, old, oldfd, td);
3594 #endif
3595 
3596 	if ((error = namei(&fromnd)) != 0)
3597 		return (error);
3598 	fvfslocked = NDHASGIANT(&fromnd);
3599 	tvfslocked = 0;
3600 #ifdef MAC
3601 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3602 	    fromnd.ni_vp, &fromnd.ni_cnd);
3603 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3604 	if (fromnd.ni_dvp != fromnd.ni_vp)
3605 		VOP_UNLOCK(fromnd.ni_vp, 0);
3606 #endif
3607 	fvp = fromnd.ni_vp;
3608 	if (error == 0)
3609 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3610 	if (error != 0) {
3611 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3612 		vrele(fromnd.ni_dvp);
3613 		vrele(fvp);
3614 		goto out1;
3615 	}
3616 	NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3617 	    MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
3618 	if (fromnd.ni_vp->v_type == VDIR)
3619 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3620 	if ((error = namei(&tond)) != 0) {
3621 		/* Translate error code for rename("dir1", "dir2/."). */
3622 		if (error == EISDIR && fvp->v_type == VDIR)
3623 			error = EINVAL;
3624 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3625 		vrele(fromnd.ni_dvp);
3626 		vrele(fvp);
3627 		vn_finished_write(mp);
3628 		goto out1;
3629 	}
3630 	tvfslocked = NDHASGIANT(&tond);
3631 	tdvp = tond.ni_dvp;
3632 	tvp = tond.ni_vp;
3633 	if (tvp != NULL) {
3634 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3635 			error = ENOTDIR;
3636 			goto out;
3637 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3638 			error = EISDIR;
3639 			goto out;
3640 		}
3641 	}
3642 	if (fvp == tdvp) {
3643 		error = EINVAL;
3644 		goto out;
3645 	}
3646 	/*
3647 	 * If the source is the same as the destination (that is, if they
3648 	 * are links to the same vnode), then there is nothing to do.
3649 	 */
3650 	if (fvp == tvp)
3651 		error = -1;
3652 #ifdef MAC
3653 	else
3654 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3655 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3656 #endif
3657 out:
3658 	if (!error) {
3659 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3660 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3661 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3662 		NDFREE(&tond, NDF_ONLY_PNBUF);
3663 	} else {
3664 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3665 		NDFREE(&tond, NDF_ONLY_PNBUF);
3666 		if (tvp)
3667 			vput(tvp);
3668 		if (tdvp == tvp)
3669 			vrele(tdvp);
3670 		else
3671 			vput(tdvp);
3672 		vrele(fromnd.ni_dvp);
3673 		vrele(fvp);
3674 	}
3675 	vrele(tond.ni_startdir);
3676 	vn_finished_write(mp);
3677 out1:
3678 	if (fromnd.ni_startdir)
3679 		vrele(fromnd.ni_startdir);
3680 	VFS_UNLOCK_GIANT(fvfslocked);
3681 	VFS_UNLOCK_GIANT(tvfslocked);
3682 	if (error == -1)
3683 		return (0);
3684 	return (error);
3685 }
3686 
3687 /*
3688  * Make a directory file.
3689  */
3690 #ifndef _SYS_SYSPROTO_H_
3691 struct mkdir_args {
3692 	char	*path;
3693 	int	mode;
3694 };
3695 #endif
3696 int
3697 mkdir(td, uap)
3698 	struct thread *td;
3699 	register struct mkdir_args /* {
3700 		char *path;
3701 		int mode;
3702 	} */ *uap;
3703 {
3704 
3705 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3706 }
3707 
3708 #ifndef _SYS_SYSPROTO_H_
3709 struct mkdirat_args {
3710 	int	fd;
3711 	char	*path;
3712 	mode_t	mode;
3713 };
3714 #endif
3715 int
3716 mkdirat(struct thread *td, struct mkdirat_args *uap)
3717 {
3718 
3719 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3720 }
3721 
3722 int
3723 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3724 {
3725 
3726 	return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
3727 }
3728 
3729 int
3730 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3731     int mode)
3732 {
3733 	struct mount *mp;
3734 	struct vnode *vp;
3735 	struct vattr vattr;
3736 	int error;
3737 	struct nameidata nd;
3738 	int vfslocked;
3739 
3740 	AUDIT_ARG_MODE(mode);
3741 restart:
3742 	bwillwrite();
3743 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3744 	    segflg, path, fd, td);
3745 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3746 	if ((error = namei(&nd)) != 0)
3747 		return (error);
3748 	vfslocked = NDHASGIANT(&nd);
3749 	vp = nd.ni_vp;
3750 	if (vp != NULL) {
3751 		NDFREE(&nd, NDF_ONLY_PNBUF);
3752 		/*
3753 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3754 		 * the strange behaviour of leaving the vnode unlocked
3755 		 * if the target is the same vnode as the parent.
3756 		 */
3757 		if (vp == nd.ni_dvp)
3758 			vrele(nd.ni_dvp);
3759 		else
3760 			vput(nd.ni_dvp);
3761 		vrele(vp);
3762 		VFS_UNLOCK_GIANT(vfslocked);
3763 		return (EEXIST);
3764 	}
3765 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3766 		NDFREE(&nd, NDF_ONLY_PNBUF);
3767 		vput(nd.ni_dvp);
3768 		VFS_UNLOCK_GIANT(vfslocked);
3769 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3770 			return (error);
3771 		goto restart;
3772 	}
3773 	VATTR_NULL(&vattr);
3774 	vattr.va_type = VDIR;
3775 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3776 #ifdef MAC
3777 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3778 	    &vattr);
3779 	if (error)
3780 		goto out;
3781 #endif
3782 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3783 #ifdef MAC
3784 out:
3785 #endif
3786 	NDFREE(&nd, NDF_ONLY_PNBUF);
3787 	vput(nd.ni_dvp);
3788 	if (!error)
3789 		vput(nd.ni_vp);
3790 	vn_finished_write(mp);
3791 	VFS_UNLOCK_GIANT(vfslocked);
3792 	return (error);
3793 }
3794 
3795 /*
3796  * Remove a directory file.
3797  */
3798 #ifndef _SYS_SYSPROTO_H_
3799 struct rmdir_args {
3800 	char	*path;
3801 };
3802 #endif
3803 int
3804 rmdir(td, uap)
3805 	struct thread *td;
3806 	struct rmdir_args /* {
3807 		char *path;
3808 	} */ *uap;
3809 {
3810 
3811 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3812 }
3813 
3814 int
3815 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3816 {
3817 
3818 	return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
3819 }
3820 
3821 int
3822 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3823 {
3824 	struct mount *mp;
3825 	struct vnode *vp;
3826 	int error;
3827 	struct nameidata nd;
3828 	int vfslocked;
3829 
3830 restart:
3831 	bwillwrite();
3832 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3833 	    pathseg, path, fd, td);
3834 	if ((error = namei(&nd)) != 0)
3835 		return (error);
3836 	vfslocked = NDHASGIANT(&nd);
3837 	vp = nd.ni_vp;
3838 	if (vp->v_type != VDIR) {
3839 		error = ENOTDIR;
3840 		goto out;
3841 	}
3842 	/*
3843 	 * No rmdir "." please.
3844 	 */
3845 	if (nd.ni_dvp == vp) {
3846 		error = EINVAL;
3847 		goto out;
3848 	}
3849 	/*
3850 	 * The root of a mounted filesystem cannot be deleted.
3851 	 */
3852 	if (vp->v_vflag & VV_ROOT) {
3853 		error = EBUSY;
3854 		goto out;
3855 	}
3856 #ifdef MAC
3857 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3858 	    &nd.ni_cnd);
3859 	if (error)
3860 		goto out;
3861 #endif
3862 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3863 		NDFREE(&nd, NDF_ONLY_PNBUF);
3864 		vput(vp);
3865 		if (nd.ni_dvp == vp)
3866 			vrele(nd.ni_dvp);
3867 		else
3868 			vput(nd.ni_dvp);
3869 		VFS_UNLOCK_GIANT(vfslocked);
3870 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3871 			return (error);
3872 		goto restart;
3873 	}
3874 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3875 	vn_finished_write(mp);
3876 out:
3877 	NDFREE(&nd, NDF_ONLY_PNBUF);
3878 	vput(vp);
3879 	if (nd.ni_dvp == vp)
3880 		vrele(nd.ni_dvp);
3881 	else
3882 		vput(nd.ni_dvp);
3883 	VFS_UNLOCK_GIANT(vfslocked);
3884 	return (error);
3885 }
3886 
3887 #ifdef COMPAT_43
3888 /*
3889  * Read a block of directory entries in a filesystem independent format.
3890  */
3891 #ifndef _SYS_SYSPROTO_H_
3892 struct ogetdirentries_args {
3893 	int	fd;
3894 	char	*buf;
3895 	u_int	count;
3896 	long	*basep;
3897 };
3898 #endif
3899 int
3900 ogetdirentries(td, uap)
3901 	struct thread *td;
3902 	register struct ogetdirentries_args /* {
3903 		int fd;
3904 		char *buf;
3905 		u_int count;
3906 		long *basep;
3907 	} */ *uap;
3908 {
3909 	struct vnode *vp;
3910 	struct file *fp;
3911 	struct uio auio, kuio;
3912 	struct iovec aiov, kiov;
3913 	struct dirent *dp, *edp;
3914 	caddr_t dirbuf;
3915 	int error, eofflag, readcnt, vfslocked;
3916 	long loff;
3917 
3918 	/* XXX arbitrary sanity limit on `count'. */
3919 	if (uap->count > 64 * 1024)
3920 		return (EINVAL);
3921 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3922 		return (error);
3923 	if ((fp->f_flag & FREAD) == 0) {
3924 		fdrop(fp, td);
3925 		return (EBADF);
3926 	}
3927 	vp = fp->f_vnode;
3928 unionread:
3929 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3930 	if (vp->v_type != VDIR) {
3931 		VFS_UNLOCK_GIANT(vfslocked);
3932 		fdrop(fp, td);
3933 		return (EINVAL);
3934 	}
3935 	aiov.iov_base = uap->buf;
3936 	aiov.iov_len = uap->count;
3937 	auio.uio_iov = &aiov;
3938 	auio.uio_iovcnt = 1;
3939 	auio.uio_rw = UIO_READ;
3940 	auio.uio_segflg = UIO_USERSPACE;
3941 	auio.uio_td = td;
3942 	auio.uio_resid = uap->count;
3943 	vn_lock(vp, LK_SHARED | LK_RETRY);
3944 	loff = auio.uio_offset = fp->f_offset;
3945 #ifdef MAC
3946 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3947 	if (error) {
3948 		VOP_UNLOCK(vp, 0);
3949 		VFS_UNLOCK_GIANT(vfslocked);
3950 		fdrop(fp, td);
3951 		return (error);
3952 	}
3953 #endif
3954 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3955 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3956 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3957 			    NULL, NULL);
3958 			fp->f_offset = auio.uio_offset;
3959 		} else
3960 #	endif
3961 	{
3962 		kuio = auio;
3963 		kuio.uio_iov = &kiov;
3964 		kuio.uio_segflg = UIO_SYSSPACE;
3965 		kiov.iov_len = uap->count;
3966 		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
3967 		kiov.iov_base = dirbuf;
3968 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3969 			    NULL, NULL);
3970 		fp->f_offset = kuio.uio_offset;
3971 		if (error == 0) {
3972 			readcnt = uap->count - kuio.uio_resid;
3973 			edp = (struct dirent *)&dirbuf[readcnt];
3974 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3975 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3976 					/*
3977 					 * The expected low byte of
3978 					 * dp->d_namlen is our dp->d_type.
3979 					 * The high MBZ byte of dp->d_namlen
3980 					 * is our dp->d_namlen.
3981 					 */
3982 					dp->d_type = dp->d_namlen;
3983 					dp->d_namlen = 0;
3984 #				else
3985 					/*
3986 					 * The dp->d_type is the high byte
3987 					 * of the expected dp->d_namlen,
3988 					 * so must be zero'ed.
3989 					 */
3990 					dp->d_type = 0;
3991 #				endif
3992 				if (dp->d_reclen > 0) {
3993 					dp = (struct dirent *)
3994 					    ((char *)dp + dp->d_reclen);
3995 				} else {
3996 					error = EIO;
3997 					break;
3998 				}
3999 			}
4000 			if (dp >= edp)
4001 				error = uiomove(dirbuf, readcnt, &auio);
4002 		}
4003 		free(dirbuf, M_TEMP);
4004 	}
4005 	if (error) {
4006 		VOP_UNLOCK(vp, 0);
4007 		VFS_UNLOCK_GIANT(vfslocked);
4008 		fdrop(fp, td);
4009 		return (error);
4010 	}
4011 	if (uap->count == auio.uio_resid &&
4012 	    (vp->v_vflag & VV_ROOT) &&
4013 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4014 		struct vnode *tvp = vp;
4015 		vp = vp->v_mount->mnt_vnodecovered;
4016 		VREF(vp);
4017 		fp->f_vnode = vp;
4018 		fp->f_data = vp;
4019 		fp->f_offset = 0;
4020 		vput(tvp);
4021 		VFS_UNLOCK_GIANT(vfslocked);
4022 		goto unionread;
4023 	}
4024 	VOP_UNLOCK(vp, 0);
4025 	VFS_UNLOCK_GIANT(vfslocked);
4026 	error = copyout(&loff, uap->basep, sizeof(long));
4027 	fdrop(fp, td);
4028 	td->td_retval[0] = uap->count - auio.uio_resid;
4029 	return (error);
4030 }
4031 #endif /* COMPAT_43 */
4032 
4033 /*
4034  * Read a block of directory entries in a filesystem independent format.
4035  */
4036 #ifndef _SYS_SYSPROTO_H_
4037 struct getdirentries_args {
4038 	int	fd;
4039 	char	*buf;
4040 	u_int	count;
4041 	long	*basep;
4042 };
4043 #endif
4044 int
4045 getdirentries(td, uap)
4046 	struct thread *td;
4047 	register struct getdirentries_args /* {
4048 		int fd;
4049 		char *buf;
4050 		u_int count;
4051 		long *basep;
4052 	} */ *uap;
4053 {
4054 	long base;
4055 	int error;
4056 
4057 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
4058 	if (error)
4059 		return (error);
4060 	if (uap->basep != NULL)
4061 		error = copyout(&base, uap->basep, sizeof(long));
4062 	return (error);
4063 }
4064 
4065 int
4066 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
4067     long *basep)
4068 {
4069 	struct vnode *vp;
4070 	struct file *fp;
4071 	struct uio auio;
4072 	struct iovec aiov;
4073 	int vfslocked;
4074 	long loff;
4075 	int error, eofflag;
4076 
4077 	AUDIT_ARG_FD(fd);
4078 	if (count > INT_MAX)
4079 		return (EINVAL);
4080 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
4081 		return (error);
4082 	if ((fp->f_flag & FREAD) == 0) {
4083 		fdrop(fp, td);
4084 		return (EBADF);
4085 	}
4086 	vp = fp->f_vnode;
4087 unionread:
4088 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4089 	if (vp->v_type != VDIR) {
4090 		VFS_UNLOCK_GIANT(vfslocked);
4091 		error = EINVAL;
4092 		goto fail;
4093 	}
4094 	aiov.iov_base = buf;
4095 	aiov.iov_len = count;
4096 	auio.uio_iov = &aiov;
4097 	auio.uio_iovcnt = 1;
4098 	auio.uio_rw = UIO_READ;
4099 	auio.uio_segflg = UIO_USERSPACE;
4100 	auio.uio_td = td;
4101 	auio.uio_resid = count;
4102 	vn_lock(vp, LK_SHARED | LK_RETRY);
4103 	AUDIT_ARG_VNODE1(vp);
4104 	loff = auio.uio_offset = fp->f_offset;
4105 #ifdef MAC
4106 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4107 	if (error == 0)
4108 #endif
4109 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4110 		    NULL);
4111 	fp->f_offset = auio.uio_offset;
4112 	if (error) {
4113 		VOP_UNLOCK(vp, 0);
4114 		VFS_UNLOCK_GIANT(vfslocked);
4115 		goto fail;
4116 	}
4117 	if (count == auio.uio_resid &&
4118 	    (vp->v_vflag & VV_ROOT) &&
4119 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4120 		struct vnode *tvp = vp;
4121 		vp = vp->v_mount->mnt_vnodecovered;
4122 		VREF(vp);
4123 		fp->f_vnode = vp;
4124 		fp->f_data = vp;
4125 		fp->f_offset = 0;
4126 		vput(tvp);
4127 		VFS_UNLOCK_GIANT(vfslocked);
4128 		goto unionread;
4129 	}
4130 	VOP_UNLOCK(vp, 0);
4131 	VFS_UNLOCK_GIANT(vfslocked);
4132 	*basep = loff;
4133 	td->td_retval[0] = count - auio.uio_resid;
4134 fail:
4135 	fdrop(fp, td);
4136 	return (error);
4137 }
4138 
4139 #ifndef _SYS_SYSPROTO_H_
4140 struct getdents_args {
4141 	int fd;
4142 	char *buf;
4143 	size_t count;
4144 };
4145 #endif
4146 int
4147 getdents(td, uap)
4148 	struct thread *td;
4149 	register struct getdents_args /* {
4150 		int fd;
4151 		char *buf;
4152 		u_int count;
4153 	} */ *uap;
4154 {
4155 	struct getdirentries_args ap;
4156 	ap.fd = uap->fd;
4157 	ap.buf = uap->buf;
4158 	ap.count = uap->count;
4159 	ap.basep = NULL;
4160 	return (getdirentries(td, &ap));
4161 }
4162 
4163 /*
4164  * Set the mode mask for creation of filesystem nodes.
4165  */
4166 #ifndef _SYS_SYSPROTO_H_
4167 struct umask_args {
4168 	int	newmask;
4169 };
4170 #endif
4171 int
4172 umask(td, uap)
4173 	struct thread *td;
4174 	struct umask_args /* {
4175 		int newmask;
4176 	} */ *uap;
4177 {
4178 	register struct filedesc *fdp;
4179 
4180 	FILEDESC_XLOCK(td->td_proc->p_fd);
4181 	fdp = td->td_proc->p_fd;
4182 	td->td_retval[0] = fdp->fd_cmask;
4183 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4184 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
4185 	return (0);
4186 }
4187 
4188 /*
4189  * Void all references to file by ripping underlying filesystem away from
4190  * vnode.
4191  */
4192 #ifndef _SYS_SYSPROTO_H_
4193 struct revoke_args {
4194 	char	*path;
4195 };
4196 #endif
4197 int
4198 revoke(td, uap)
4199 	struct thread *td;
4200 	register struct revoke_args /* {
4201 		char *path;
4202 	} */ *uap;
4203 {
4204 	struct vnode *vp;
4205 	struct vattr vattr;
4206 	int error;
4207 	struct nameidata nd;
4208 	int vfslocked;
4209 
4210 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4211 	    UIO_USERSPACE, uap->path, td);
4212 	if ((error = namei(&nd)) != 0)
4213 		return (error);
4214 	vfslocked = NDHASGIANT(&nd);
4215 	vp = nd.ni_vp;
4216 	NDFREE(&nd, NDF_ONLY_PNBUF);
4217 	if (vp->v_type != VCHR) {
4218 		error = EINVAL;
4219 		goto out;
4220 	}
4221 #ifdef MAC
4222 	error = mac_vnode_check_revoke(td->td_ucred, vp);
4223 	if (error)
4224 		goto out;
4225 #endif
4226 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4227 	if (error)
4228 		goto out;
4229 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4230 		error = priv_check(td, PRIV_VFS_ADMIN);
4231 		if (error)
4232 			goto out;
4233 	}
4234 	if (vcount(vp) > 1)
4235 		VOP_REVOKE(vp, REVOKEALL);
4236 out:
4237 	vput(vp);
4238 	VFS_UNLOCK_GIANT(vfslocked);
4239 	return (error);
4240 }
4241 
4242 /*
4243  * Convert a user file descriptor to a kernel file entry.
4244  * A reference on the file entry is held upon returning.
4245  */
4246 int
4247 getvnode(fdp, fd, fpp)
4248 	struct filedesc *fdp;
4249 	int fd;
4250 	struct file **fpp;
4251 {
4252 	int error;
4253 	struct file *fp;
4254 
4255 	error = 0;
4256 	fp = NULL;
4257 	if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
4258 		error = EBADF;
4259 	else if (fp->f_vnode == NULL) {
4260 		error = EINVAL;
4261 		fdrop(fp, curthread);
4262 	}
4263 	*fpp = fp;
4264 	return (error);
4265 }
4266 
4267 /*
4268  * Get an (NFS) file handle.
4269  */
4270 #ifndef _SYS_SYSPROTO_H_
4271 struct lgetfh_args {
4272 	char	*fname;
4273 	fhandle_t *fhp;
4274 };
4275 #endif
4276 int
4277 lgetfh(td, uap)
4278 	struct thread *td;
4279 	register struct lgetfh_args *uap;
4280 {
4281 	struct nameidata nd;
4282 	fhandle_t fh;
4283 	register struct vnode *vp;
4284 	int vfslocked;
4285 	int error;
4286 
4287 	error = priv_check(td, PRIV_VFS_GETFH);
4288 	if (error)
4289 		return (error);
4290 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4291 	    UIO_USERSPACE, uap->fname, td);
4292 	error = namei(&nd);
4293 	if (error)
4294 		return (error);
4295 	vfslocked = NDHASGIANT(&nd);
4296 	NDFREE(&nd, NDF_ONLY_PNBUF);
4297 	vp = nd.ni_vp;
4298 	bzero(&fh, sizeof(fh));
4299 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4300 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4301 	vput(vp);
4302 	VFS_UNLOCK_GIANT(vfslocked);
4303 	if (error)
4304 		return (error);
4305 	error = copyout(&fh, uap->fhp, sizeof (fh));
4306 	return (error);
4307 }
4308 
4309 #ifndef _SYS_SYSPROTO_H_
4310 struct getfh_args {
4311 	char	*fname;
4312 	fhandle_t *fhp;
4313 };
4314 #endif
4315 int
4316 getfh(td, uap)
4317 	struct thread *td;
4318 	register struct getfh_args *uap;
4319 {
4320 	struct nameidata nd;
4321 	fhandle_t fh;
4322 	register struct vnode *vp;
4323 	int vfslocked;
4324 	int error;
4325 
4326 	error = priv_check(td, PRIV_VFS_GETFH);
4327 	if (error)
4328 		return (error);
4329 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4330 	    UIO_USERSPACE, uap->fname, td);
4331 	error = namei(&nd);
4332 	if (error)
4333 		return (error);
4334 	vfslocked = NDHASGIANT(&nd);
4335 	NDFREE(&nd, NDF_ONLY_PNBUF);
4336 	vp = nd.ni_vp;
4337 	bzero(&fh, sizeof(fh));
4338 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4339 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4340 	vput(vp);
4341 	VFS_UNLOCK_GIANT(vfslocked);
4342 	if (error)
4343 		return (error);
4344 	error = copyout(&fh, uap->fhp, sizeof (fh));
4345 	return (error);
4346 }
4347 
4348 /*
4349  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4350  * open descriptor.
4351  *
4352  * warning: do not remove the priv_check() call or this becomes one giant
4353  * security hole.
4354  */
4355 #ifndef _SYS_SYSPROTO_H_
4356 struct fhopen_args {
4357 	const struct fhandle *u_fhp;
4358 	int flags;
4359 };
4360 #endif
4361 int
4362 fhopen(td, uap)
4363 	struct thread *td;
4364 	struct fhopen_args /* {
4365 		const struct fhandle *u_fhp;
4366 		int flags;
4367 	} */ *uap;
4368 {
4369 	struct proc *p = td->td_proc;
4370 	struct mount *mp;
4371 	struct vnode *vp;
4372 	struct fhandle fhp;
4373 	struct vattr vat;
4374 	struct vattr *vap = &vat;
4375 	struct flock lf;
4376 	struct file *fp;
4377 	register struct filedesc *fdp = p->p_fd;
4378 	int fmode, error, type;
4379 	accmode_t accmode;
4380 	struct file *nfp;
4381 	int vfslocked;
4382 	int indx;
4383 
4384 	error = priv_check(td, PRIV_VFS_FHOPEN);
4385 	if (error)
4386 		return (error);
4387 	fmode = FFLAGS(uap->flags);
4388 	/* why not allow a non-read/write open for our lockd? */
4389 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4390 		return (EINVAL);
4391 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4392 	if (error)
4393 		return(error);
4394 	/* find the mount point */
4395 	mp = vfs_busyfs(&fhp.fh_fsid);
4396 	if (mp == NULL)
4397 		return (ESTALE);
4398 	vfslocked = VFS_LOCK_GIANT(mp);
4399 	/* now give me my vnode, it gets returned to me locked */
4400 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4401 	vfs_unbusy(mp);
4402 	if (error)
4403 		goto out;
4404 	/*
4405 	 * from now on we have to make sure not
4406 	 * to forget about the vnode
4407 	 * any error that causes an abort must vput(vp)
4408 	 * just set error = err and 'goto bad;'.
4409 	 */
4410 
4411 	/*
4412 	 * from vn_open
4413 	 */
4414 	if (vp->v_type == VLNK) {
4415 		error = EMLINK;
4416 		goto bad;
4417 	}
4418 	if (vp->v_type == VSOCK) {
4419 		error = EOPNOTSUPP;
4420 		goto bad;
4421 	}
4422 	accmode = 0;
4423 	if (fmode & (FWRITE | O_TRUNC)) {
4424 		if (vp->v_type == VDIR) {
4425 			error = EISDIR;
4426 			goto bad;
4427 		}
4428 		error = vn_writechk(vp);
4429 		if (error)
4430 			goto bad;
4431 		accmode |= VWRITE;
4432 	}
4433 	if (fmode & FREAD)
4434 		accmode |= VREAD;
4435 	if ((fmode & O_APPEND) && (fmode & FWRITE))
4436 		accmode |= VAPPEND;
4437 #ifdef MAC
4438 	error = mac_vnode_check_open(td->td_ucred, vp, accmode);
4439 	if (error)
4440 		goto bad;
4441 #endif
4442 	if (accmode) {
4443 		error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
4444 		if (error)
4445 			goto bad;
4446 	}
4447 	if (fmode & O_TRUNC) {
4448 		vfs_ref(mp);
4449 		VOP_UNLOCK(vp, 0);				/* XXX */
4450 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4451 			vrele(vp);
4452 			vfs_rel(mp);
4453 			goto out;
4454 		}
4455 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4456 		vfs_rel(mp);
4457 #ifdef MAC
4458 		/*
4459 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4460 		 * should be right.
4461 		 */
4462 		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4463 		if (error == 0) {
4464 #endif
4465 			VATTR_NULL(vap);
4466 			vap->va_size = 0;
4467 			error = VOP_SETATTR(vp, vap, td->td_ucred);
4468 #ifdef MAC
4469 		}
4470 #endif
4471 		vn_finished_write(mp);
4472 		if (error)
4473 			goto bad;
4474 	}
4475 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4476 	if (error)
4477 		goto bad;
4478 
4479 	if (fmode & FWRITE)
4480 		vp->v_writecount++;
4481 
4482 	/*
4483 	 * end of vn_open code
4484 	 */
4485 
4486 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4487 		if (fmode & FWRITE)
4488 			vp->v_writecount--;
4489 		goto bad;
4490 	}
4491 	/* An extra reference on `nfp' has been held for us by falloc(). */
4492 	fp = nfp;
4493 	nfp->f_vnode = vp;
4494 	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4495 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4496 		lf.l_whence = SEEK_SET;
4497 		lf.l_start = 0;
4498 		lf.l_len = 0;
4499 		if (fmode & O_EXLOCK)
4500 			lf.l_type = F_WRLCK;
4501 		else
4502 			lf.l_type = F_RDLCK;
4503 		type = F_FLOCK;
4504 		if ((fmode & FNONBLOCK) == 0)
4505 			type |= F_WAIT;
4506 		VOP_UNLOCK(vp, 0);
4507 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4508 			    type)) != 0) {
4509 			/*
4510 			 * The lock request failed.  Normally close the
4511 			 * descriptor but handle the case where someone might
4512 			 * have dup()d or close()d it when we weren't looking.
4513 			 */
4514 			fdclose(fdp, fp, indx, td);
4515 
4516 			/*
4517 			 * release our private reference
4518 			 */
4519 			fdrop(fp, td);
4520 			goto out;
4521 		}
4522 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4523 		atomic_set_int(&fp->f_flag, FHASLOCK);
4524 	}
4525 
4526 	VOP_UNLOCK(vp, 0);
4527 	fdrop(fp, td);
4528 	VFS_UNLOCK_GIANT(vfslocked);
4529 	td->td_retval[0] = indx;
4530 	return (0);
4531 
4532 bad:
4533 	vput(vp);
4534 out:
4535 	VFS_UNLOCK_GIANT(vfslocked);
4536 	return (error);
4537 }
4538 
4539 /*
4540  * Stat an (NFS) file handle.
4541  */
4542 #ifndef _SYS_SYSPROTO_H_
4543 struct fhstat_args {
4544 	struct fhandle *u_fhp;
4545 	struct stat *sb;
4546 };
4547 #endif
4548 int
4549 fhstat(td, uap)
4550 	struct thread *td;
4551 	register struct fhstat_args /* {
4552 		struct fhandle *u_fhp;
4553 		struct stat *sb;
4554 	} */ *uap;
4555 {
4556 	struct stat sb;
4557 	fhandle_t fh;
4558 	struct mount *mp;
4559 	struct vnode *vp;
4560 	int vfslocked;
4561 	int error;
4562 
4563 	error = priv_check(td, PRIV_VFS_FHSTAT);
4564 	if (error)
4565 		return (error);
4566 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4567 	if (error)
4568 		return (error);
4569 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4570 		return (ESTALE);
4571 	vfslocked = VFS_LOCK_GIANT(mp);
4572 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4573 	vfs_unbusy(mp);
4574 	if (error) {
4575 		VFS_UNLOCK_GIANT(vfslocked);
4576 		return (error);
4577 	}
4578 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4579 	vput(vp);
4580 	VFS_UNLOCK_GIANT(vfslocked);
4581 	if (error)
4582 		return (error);
4583 	error = copyout(&sb, uap->sb, sizeof(sb));
4584 	return (error);
4585 }
4586 
4587 /*
4588  * Implement fstatfs() for (NFS) file handles.
4589  */
4590 #ifndef _SYS_SYSPROTO_H_
4591 struct fhstatfs_args {
4592 	struct fhandle *u_fhp;
4593 	struct statfs *buf;
4594 };
4595 #endif
4596 int
4597 fhstatfs(td, uap)
4598 	struct thread *td;
4599 	struct fhstatfs_args /* {
4600 		struct fhandle *u_fhp;
4601 		struct statfs *buf;
4602 	} */ *uap;
4603 {
4604 	struct statfs sf;
4605 	fhandle_t fh;
4606 	int error;
4607 
4608 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4609 	if (error)
4610 		return (error);
4611 	error = kern_fhstatfs(td, fh, &sf);
4612 	if (error)
4613 		return (error);
4614 	return (copyout(&sf, uap->buf, sizeof(sf)));
4615 }
4616 
4617 int
4618 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4619 {
4620 	struct statfs *sp;
4621 	struct mount *mp;
4622 	struct vnode *vp;
4623 	int vfslocked;
4624 	int error;
4625 
4626 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4627 	if (error)
4628 		return (error);
4629 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4630 		return (ESTALE);
4631 	vfslocked = VFS_LOCK_GIANT(mp);
4632 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4633 	if (error) {
4634 		vfs_unbusy(mp);
4635 		VFS_UNLOCK_GIANT(vfslocked);
4636 		return (error);
4637 	}
4638 	vput(vp);
4639 	error = prison_canseemount(td->td_ucred, mp);
4640 	if (error)
4641 		goto out;
4642 #ifdef MAC
4643 	error = mac_mount_check_stat(td->td_ucred, mp);
4644 	if (error)
4645 		goto out;
4646 #endif
4647 	/*
4648 	 * Set these in case the underlying filesystem fails to do so.
4649 	 */
4650 	sp = &mp->mnt_stat;
4651 	sp->f_version = STATFS_VERSION;
4652 	sp->f_namemax = NAME_MAX;
4653 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4654 	error = VFS_STATFS(mp, sp);
4655 	if (error == 0)
4656 		*buf = *sp;
4657 out:
4658 	vfs_unbusy(mp);
4659 	VFS_UNLOCK_GIANT(vfslocked);
4660 	return (error);
4661 }
4662