xref: /freebsd/sys/kern/vfs_syscalls.c (revision 830940567b49bb0c08dfaed40418999e76616909)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_kdtrace.h"
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bio.h>
47 #include <sys/buf.h>
48 #include <sys/sysent.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/filio.h>
59 #include <sys/limits.h>
60 #include <sys/linker.h>
61 #include <sys/sdt.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/unistd.h>
65 #include <sys/vnode.h>
66 #include <sys/priv.h>
67 #include <sys/proc.h>
68 #include <sys/dirent.h>
69 #include <sys/jail.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 
76 #include <machine/stdarg.h>
77 
78 #include <security/audit/audit.h>
79 #include <security/mac/mac_framework.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/uma.h>
85 
86 SDT_PROVIDER_DEFINE(vfs);
87 SDT_PROBE_DEFINE(vfs, , stat, mode);
88 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
89 SDT_PROBE_ARGTYPE(vfs, , stat, mode, 1, "int");
90 SDT_PROBE_DEFINE(vfs, , stat, reg);
91 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 0, "char *");
92 SDT_PROBE_ARGTYPE(vfs, , stat, reg, 1, "int");
93 
94 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
96 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
97 static int setfmode(struct thread *td, struct vnode *, int);
98 static int setfflags(struct thread *td, struct vnode *, int);
99 static int setutimes(struct thread *td, struct vnode *,
100     const struct timespec *, int, int);
101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
102     struct thread *td);
103 
104 /*
105  * The module initialization routine for POSIX asynchronous I/O will
106  * set this to the version of AIO that it implements.  (Zero means
107  * that it is not implemented.)  This value is used here by pathconf()
108  * and in kern_descrip.c by fpathconf().
109  */
110 int async_io_version;
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /*
118  * Sync each mounted filesystem.
119  */
120 #ifndef _SYS_SYSPROTO_H_
121 struct sync_args {
122 	int     dummy;
123 };
124 #endif
125 /* ARGSUSED */
126 int
127 sync(td, uap)
128 	struct thread *td;
129 	struct sync_args *uap;
130 {
131 	struct mount *mp, *nmp;
132 	int vfslocked;
133 
134 	mtx_lock(&mountlist_mtx);
135 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
136 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
137 			nmp = TAILQ_NEXT(mp, mnt_list);
138 			continue;
139 		}
140 		vfslocked = VFS_LOCK_GIANT(mp);
141 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
142 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
143 			MNT_ILOCK(mp);
144 			mp->mnt_noasync++;
145 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
146 			MNT_IUNLOCK(mp);
147 			vfs_msync(mp, MNT_NOWAIT);
148 			VFS_SYNC(mp, MNT_NOWAIT);
149 			MNT_ILOCK(mp);
150 			mp->mnt_noasync--;
151 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
152 			    mp->mnt_noasync == 0)
153 				mp->mnt_kern_flag |= MNTK_ASYNC;
154 			MNT_IUNLOCK(mp);
155 			vn_finished_write(mp);
156 		}
157 		VFS_UNLOCK_GIANT(vfslocked);
158 		mtx_lock(&mountlist_mtx);
159 		nmp = TAILQ_NEXT(mp, mnt_list);
160 		vfs_unbusy(mp);
161 	}
162 	mtx_unlock(&mountlist_mtx);
163 	return (0);
164 }
165 
166 /*
167  * Change filesystem quotas.
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp;
188 	int vfslocked;
189 	int error;
190 	struct nameidata nd;
191 
192 	AUDIT_ARG_CMD(uap->cmd);
193 	AUDIT_ARG_UID(uap->uid);
194 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
195 		return (EPERM);
196 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
197 	   UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0)
199 		return (error);
200 	vfslocked = NDHASGIANT(&nd);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	mp = nd.ni_vp->v_mount;
203 	vfs_ref(mp);
204 	vput(nd.ni_vp);
205 	error = vfs_busy(mp, 0);
206 	vfs_rel(mp);
207 	if (error) {
208 		VFS_UNLOCK_GIANT(vfslocked);
209 		return (error);
210 	}
211 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
212 	vfs_unbusy(mp);
213 	VFS_UNLOCK_GIANT(vfslocked);
214 	return (error);
215 }
216 
217 /*
218  * Used by statfs conversion routines to scale the block size up if
219  * necessary so that all of the block counts are <= 'max_size'.  Note
220  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
221  * value of 'n'.
222  */
223 void
224 statfs_scale_blocks(struct statfs *sf, long max_size)
225 {
226 	uint64_t count;
227 	int shift;
228 
229 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
230 
231 	/*
232 	 * Attempt to scale the block counts to give a more accurate
233 	 * overview to userland of the ratio of free space to used
234 	 * space.  To do this, find the largest block count and compute
235 	 * a divisor that lets it fit into a signed integer <= max_size.
236 	 */
237 	if (sf->f_bavail < 0)
238 		count = -sf->f_bavail;
239 	else
240 		count = sf->f_bavail;
241 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
242 	if (count <= max_size)
243 		return;
244 
245 	count >>= flsl(max_size);
246 	shift = 0;
247 	while (count > 0) {
248 		shift++;
249 		count >>=1;
250 	}
251 
252 	sf->f_bsize <<= shift;
253 	sf->f_blocks >>= shift;
254 	sf->f_bfree >>= shift;
255 	sf->f_bavail >>= shift;
256 }
257 
258 /*
259  * Get filesystem statistics.
260  */
261 #ifndef _SYS_SYSPROTO_H_
262 struct statfs_args {
263 	char *path;
264 	struct statfs *buf;
265 };
266 #endif
267 int
268 statfs(td, uap)
269 	struct thread *td;
270 	register struct statfs_args /* {
271 		char *path;
272 		struct statfs *buf;
273 	} */ *uap;
274 {
275 	struct statfs sf;
276 	int error;
277 
278 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
279 	if (error == 0)
280 		error = copyout(&sf, uap->buf, sizeof(sf));
281 	return (error);
282 }
283 
284 int
285 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
286     struct statfs *buf)
287 {
288 	struct mount *mp;
289 	struct statfs *sp, sb;
290 	int vfslocked;
291 	int error;
292 	struct nameidata nd;
293 
294 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
295 	    AUDITVNODE1, pathseg, path, td);
296 	error = namei(&nd);
297 	if (error)
298 		return (error);
299 	vfslocked = NDHASGIANT(&nd);
300 	mp = nd.ni_vp->v_mount;
301 	vfs_ref(mp);
302 	NDFREE(&nd, NDF_ONLY_PNBUF);
303 	vput(nd.ni_vp);
304 	error = vfs_busy(mp, 0);
305 	vfs_rel(mp);
306 	if (error) {
307 		VFS_UNLOCK_GIANT(vfslocked);
308 		return (error);
309 	}
310 #ifdef MAC
311 	error = mac_mount_check_stat(td->td_ucred, mp);
312 	if (error)
313 		goto out;
314 #endif
315 	/*
316 	 * Set these in case the underlying filesystem fails to do so.
317 	 */
318 	sp = &mp->mnt_stat;
319 	sp->f_version = STATFS_VERSION;
320 	sp->f_namemax = NAME_MAX;
321 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
322 	error = VFS_STATFS(mp, sp);
323 	if (error)
324 		goto out;
325 	if (priv_check(td, PRIV_VFS_GENERATION)) {
326 		bcopy(sp, &sb, sizeof(sb));
327 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
328 		prison_enforce_statfs(td->td_ucred, mp, &sb);
329 		sp = &sb;
330 	}
331 	*buf = *sp;
332 out:
333 	vfs_unbusy(mp);
334 	VFS_UNLOCK_GIANT(vfslocked);
335 	return (error);
336 }
337 
338 /*
339  * Get filesystem statistics.
340  */
341 #ifndef _SYS_SYSPROTO_H_
342 struct fstatfs_args {
343 	int fd;
344 	struct statfs *buf;
345 };
346 #endif
347 int
348 fstatfs(td, uap)
349 	struct thread *td;
350 	register struct fstatfs_args /* {
351 		int fd;
352 		struct statfs *buf;
353 	} */ *uap;
354 {
355 	struct statfs sf;
356 	int error;
357 
358 	error = kern_fstatfs(td, uap->fd, &sf);
359 	if (error == 0)
360 		error = copyout(&sf, uap->buf, sizeof(sf));
361 	return (error);
362 }
363 
364 int
365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
366 {
367 	struct file *fp;
368 	struct mount *mp;
369 	struct statfs *sp, sb;
370 	int vfslocked;
371 	struct vnode *vp;
372 	int error;
373 
374 	AUDIT_ARG_FD(fd);
375 	error = getvnode(td->td_proc->p_fd, fd, &fp);
376 	if (error)
377 		return (error);
378 	vp = fp->f_vnode;
379 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
380 	vn_lock(vp, LK_SHARED | LK_RETRY);
381 #ifdef AUDIT
382 	AUDIT_ARG_VNODE1(vp);
383 #endif
384 	mp = vp->v_mount;
385 	if (mp)
386 		vfs_ref(mp);
387 	VOP_UNLOCK(vp, 0);
388 	fdrop(fp, td);
389 	if (mp == NULL) {
390 		error = EBADF;
391 		goto out;
392 	}
393 	error = vfs_busy(mp, 0);
394 	vfs_rel(mp);
395 	if (error) {
396 		VFS_UNLOCK_GIANT(vfslocked);
397 		return (error);
398 	}
399 #ifdef MAC
400 	error = mac_mount_check_stat(td->td_ucred, mp);
401 	if (error)
402 		goto out;
403 #endif
404 	/*
405 	 * Set these in case the underlying filesystem fails to do so.
406 	 */
407 	sp = &mp->mnt_stat;
408 	sp->f_version = STATFS_VERSION;
409 	sp->f_namemax = NAME_MAX;
410 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
411 	error = VFS_STATFS(mp, sp);
412 	if (error)
413 		goto out;
414 	if (priv_check(td, PRIV_VFS_GENERATION)) {
415 		bcopy(sp, &sb, sizeof(sb));
416 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
417 		prison_enforce_statfs(td->td_ucred, mp, &sb);
418 		sp = &sb;
419 	}
420 	*buf = *sp;
421 out:
422 	if (mp)
423 		vfs_unbusy(mp);
424 	VFS_UNLOCK_GIANT(vfslocked);
425 	return (error);
426 }
427 
428 /*
429  * Get statistics on all filesystems.
430  */
431 #ifndef _SYS_SYSPROTO_H_
432 struct getfsstat_args {
433 	struct statfs *buf;
434 	long bufsize;
435 	int flags;
436 };
437 #endif
438 int
439 getfsstat(td, uap)
440 	struct thread *td;
441 	register struct getfsstat_args /* {
442 		struct statfs *buf;
443 		long bufsize;
444 		int flags;
445 	} */ *uap;
446 {
447 
448 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
449 	    uap->flags));
450 }
451 
452 /*
453  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
454  * 	The caller is responsible for freeing memory which will be allocated
455  *	in '*buf'.
456  */
457 int
458 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
459     enum uio_seg bufseg, int flags)
460 {
461 	struct mount *mp, *nmp;
462 	struct statfs *sfsp, *sp, sb;
463 	size_t count, maxcount;
464 	int vfslocked;
465 	int error;
466 
467 	maxcount = bufsize / sizeof(struct statfs);
468 	if (bufsize == 0)
469 		sfsp = NULL;
470 	else if (bufseg == UIO_USERSPACE)
471 		sfsp = *buf;
472 	else /* if (bufseg == UIO_SYSSPACE) */ {
473 		count = 0;
474 		mtx_lock(&mountlist_mtx);
475 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
476 			count++;
477 		}
478 		mtx_unlock(&mountlist_mtx);
479 		if (maxcount > count)
480 			maxcount = count;
481 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
482 		    M_WAITOK);
483 	}
484 	count = 0;
485 	mtx_lock(&mountlist_mtx);
486 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
487 		if (prison_canseemount(td->td_ucred, mp) != 0) {
488 			nmp = TAILQ_NEXT(mp, mnt_list);
489 			continue;
490 		}
491 #ifdef MAC
492 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
493 			nmp = TAILQ_NEXT(mp, mnt_list);
494 			continue;
495 		}
496 #endif
497 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
498 			nmp = TAILQ_NEXT(mp, mnt_list);
499 			continue;
500 		}
501 		vfslocked = VFS_LOCK_GIANT(mp);
502 		if (sfsp && count < maxcount) {
503 			sp = &mp->mnt_stat;
504 			/*
505 			 * Set these in case the underlying filesystem
506 			 * fails to do so.
507 			 */
508 			sp->f_version = STATFS_VERSION;
509 			sp->f_namemax = NAME_MAX;
510 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
511 			/*
512 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
513 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
514 			 * overrides MNT_WAIT.
515 			 */
516 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
517 			    (flags & MNT_WAIT)) &&
518 			    (error = VFS_STATFS(mp, sp))) {
519 				VFS_UNLOCK_GIANT(vfslocked);
520 				mtx_lock(&mountlist_mtx);
521 				nmp = TAILQ_NEXT(mp, mnt_list);
522 				vfs_unbusy(mp);
523 				continue;
524 			}
525 			if (priv_check(td, PRIV_VFS_GENERATION)) {
526 				bcopy(sp, &sb, sizeof(sb));
527 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
528 				prison_enforce_statfs(td->td_ucred, mp, &sb);
529 				sp = &sb;
530 			}
531 			if (bufseg == UIO_SYSSPACE)
532 				bcopy(sp, sfsp, sizeof(*sp));
533 			else /* if (bufseg == UIO_USERSPACE) */ {
534 				error = copyout(sp, sfsp, sizeof(*sp));
535 				if (error) {
536 					vfs_unbusy(mp);
537 					VFS_UNLOCK_GIANT(vfslocked);
538 					return (error);
539 				}
540 			}
541 			sfsp++;
542 		}
543 		VFS_UNLOCK_GIANT(vfslocked);
544 		count++;
545 		mtx_lock(&mountlist_mtx);
546 		nmp = TAILQ_NEXT(mp, mnt_list);
547 		vfs_unbusy(mp);
548 	}
549 	mtx_unlock(&mountlist_mtx);
550 	if (sfsp && count > maxcount)
551 		td->td_retval[0] = maxcount;
552 	else
553 		td->td_retval[0] = count;
554 	return (0);
555 }
556 
557 #ifdef COMPAT_FREEBSD4
558 /*
559  * Get old format filesystem statistics.
560  */
561 static void cvtstatfs(struct statfs *, struct ostatfs *);
562 
563 #ifndef _SYS_SYSPROTO_H_
564 struct freebsd4_statfs_args {
565 	char *path;
566 	struct ostatfs *buf;
567 };
568 #endif
569 int
570 freebsd4_statfs(td, uap)
571 	struct thread *td;
572 	struct freebsd4_statfs_args /* {
573 		char *path;
574 		struct ostatfs *buf;
575 	} */ *uap;
576 {
577 	struct ostatfs osb;
578 	struct statfs sf;
579 	int error;
580 
581 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
582 	if (error)
583 		return (error);
584 	cvtstatfs(&sf, &osb);
585 	return (copyout(&osb, uap->buf, sizeof(osb)));
586 }
587 
588 /*
589  * Get filesystem statistics.
590  */
591 #ifndef _SYS_SYSPROTO_H_
592 struct freebsd4_fstatfs_args {
593 	int fd;
594 	struct ostatfs *buf;
595 };
596 #endif
597 int
598 freebsd4_fstatfs(td, uap)
599 	struct thread *td;
600 	struct freebsd4_fstatfs_args /* {
601 		int fd;
602 		struct ostatfs *buf;
603 	} */ *uap;
604 {
605 	struct ostatfs osb;
606 	struct statfs sf;
607 	int error;
608 
609 	error = kern_fstatfs(td, uap->fd, &sf);
610 	if (error)
611 		return (error);
612 	cvtstatfs(&sf, &osb);
613 	return (copyout(&osb, uap->buf, sizeof(osb)));
614 }
615 
616 /*
617  * Get statistics on all filesystems.
618  */
619 #ifndef _SYS_SYSPROTO_H_
620 struct freebsd4_getfsstat_args {
621 	struct ostatfs *buf;
622 	long bufsize;
623 	int flags;
624 };
625 #endif
626 int
627 freebsd4_getfsstat(td, uap)
628 	struct thread *td;
629 	register struct freebsd4_getfsstat_args /* {
630 		struct ostatfs *buf;
631 		long bufsize;
632 		int flags;
633 	} */ *uap;
634 {
635 	struct statfs *buf, *sp;
636 	struct ostatfs osb;
637 	size_t count, size;
638 	int error;
639 
640 	count = uap->bufsize / sizeof(struct ostatfs);
641 	size = count * sizeof(struct statfs);
642 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
643 	if (size > 0) {
644 		count = td->td_retval[0];
645 		sp = buf;
646 		while (count > 0 && error == 0) {
647 			cvtstatfs(sp, &osb);
648 			error = copyout(&osb, uap->buf, sizeof(osb));
649 			sp++;
650 			uap->buf++;
651 			count--;
652 		}
653 		free(buf, M_TEMP);
654 	}
655 	return (error);
656 }
657 
658 /*
659  * Implement fstatfs() for (NFS) file handles.
660  */
661 #ifndef _SYS_SYSPROTO_H_
662 struct freebsd4_fhstatfs_args {
663 	struct fhandle *u_fhp;
664 	struct ostatfs *buf;
665 };
666 #endif
667 int
668 freebsd4_fhstatfs(td, uap)
669 	struct thread *td;
670 	struct freebsd4_fhstatfs_args /* {
671 		struct fhandle *u_fhp;
672 		struct ostatfs *buf;
673 	} */ *uap;
674 {
675 	struct ostatfs osb;
676 	struct statfs sf;
677 	fhandle_t fh;
678 	int error;
679 
680 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
681 	if (error)
682 		return (error);
683 	error = kern_fhstatfs(td, fh, &sf);
684 	if (error)
685 		return (error);
686 	cvtstatfs(&sf, &osb);
687 	return (copyout(&osb, uap->buf, sizeof(osb)));
688 }
689 
690 /*
691  * Convert a new format statfs structure to an old format statfs structure.
692  */
693 static void
694 cvtstatfs(nsp, osp)
695 	struct statfs *nsp;
696 	struct ostatfs *osp;
697 {
698 
699 	statfs_scale_blocks(nsp, LONG_MAX);
700 	bzero(osp, sizeof(*osp));
701 	osp->f_bsize = nsp->f_bsize;
702 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
703 	osp->f_blocks = nsp->f_blocks;
704 	osp->f_bfree = nsp->f_bfree;
705 	osp->f_bavail = nsp->f_bavail;
706 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
707 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
708 	osp->f_owner = nsp->f_owner;
709 	osp->f_type = nsp->f_type;
710 	osp->f_flags = nsp->f_flags;
711 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
712 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
713 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
714 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
715 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
716 	    MIN(MFSNAMELEN, OMFSNAMELEN));
717 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
718 	    MIN(MNAMELEN, OMNAMELEN));
719 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
720 	    MIN(MNAMELEN, OMNAMELEN));
721 	osp->f_fsid = nsp->f_fsid;
722 }
723 #endif /* COMPAT_FREEBSD4 */
724 
725 /*
726  * Change current working directory to a given file descriptor.
727  */
728 #ifndef _SYS_SYSPROTO_H_
729 struct fchdir_args {
730 	int	fd;
731 };
732 #endif
733 int
734 fchdir(td, uap)
735 	struct thread *td;
736 	struct fchdir_args /* {
737 		int fd;
738 	} */ *uap;
739 {
740 	register struct filedesc *fdp = td->td_proc->p_fd;
741 	struct vnode *vp, *tdp, *vpold;
742 	struct mount *mp;
743 	struct file *fp;
744 	int vfslocked;
745 	int error;
746 
747 	AUDIT_ARG_FD(uap->fd);
748 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
749 		return (error);
750 	vp = fp->f_vnode;
751 	VREF(vp);
752 	fdrop(fp, td);
753 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
754 	vn_lock(vp, LK_SHARED | LK_RETRY);
755 	AUDIT_ARG_VNODE1(vp);
756 	error = change_dir(vp, td);
757 	while (!error && (mp = vp->v_mountedhere) != NULL) {
758 		int tvfslocked;
759 		if (vfs_busy(mp, 0))
760 			continue;
761 		tvfslocked = VFS_LOCK_GIANT(mp);
762 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
763 		vfs_unbusy(mp);
764 		if (error) {
765 			VFS_UNLOCK_GIANT(tvfslocked);
766 			break;
767 		}
768 		vput(vp);
769 		VFS_UNLOCK_GIANT(vfslocked);
770 		vp = tdp;
771 		vfslocked = tvfslocked;
772 	}
773 	if (error) {
774 		vput(vp);
775 		VFS_UNLOCK_GIANT(vfslocked);
776 		return (error);
777 	}
778 	VOP_UNLOCK(vp, 0);
779 	VFS_UNLOCK_GIANT(vfslocked);
780 	FILEDESC_XLOCK(fdp);
781 	vpold = fdp->fd_cdir;
782 	fdp->fd_cdir = vp;
783 	FILEDESC_XUNLOCK(fdp);
784 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
785 	vrele(vpold);
786 	VFS_UNLOCK_GIANT(vfslocked);
787 	return (0);
788 }
789 
790 /*
791  * Change current working directory (``.'').
792  */
793 #ifndef _SYS_SYSPROTO_H_
794 struct chdir_args {
795 	char	*path;
796 };
797 #endif
798 int
799 chdir(td, uap)
800 	struct thread *td;
801 	struct chdir_args /* {
802 		char *path;
803 	} */ *uap;
804 {
805 
806 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
807 }
808 
809 int
810 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
811 {
812 	register struct filedesc *fdp = td->td_proc->p_fd;
813 	int error;
814 	struct nameidata nd;
815 	struct vnode *vp;
816 	int vfslocked;
817 
818 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
819 	    MPSAFE, pathseg, path, td);
820 	if ((error = namei(&nd)) != 0)
821 		return (error);
822 	vfslocked = NDHASGIANT(&nd);
823 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
824 		vput(nd.ni_vp);
825 		VFS_UNLOCK_GIANT(vfslocked);
826 		NDFREE(&nd, NDF_ONLY_PNBUF);
827 		return (error);
828 	}
829 	VOP_UNLOCK(nd.ni_vp, 0);
830 	VFS_UNLOCK_GIANT(vfslocked);
831 	NDFREE(&nd, NDF_ONLY_PNBUF);
832 	FILEDESC_XLOCK(fdp);
833 	vp = fdp->fd_cdir;
834 	fdp->fd_cdir = nd.ni_vp;
835 	FILEDESC_XUNLOCK(fdp);
836 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
837 	vrele(vp);
838 	VFS_UNLOCK_GIANT(vfslocked);
839 	return (0);
840 }
841 
842 /*
843  * Helper function for raised chroot(2) security function:  Refuse if
844  * any filedescriptors are open directories.
845  */
846 static int
847 chroot_refuse_vdir_fds(fdp)
848 	struct filedesc *fdp;
849 {
850 	struct vnode *vp;
851 	struct file *fp;
852 	int fd;
853 
854 	FILEDESC_LOCK_ASSERT(fdp);
855 
856 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
857 		fp = fget_locked(fdp, fd);
858 		if (fp == NULL)
859 			continue;
860 		if (fp->f_type == DTYPE_VNODE) {
861 			vp = fp->f_vnode;
862 			if (vp->v_type == VDIR)
863 				return (EPERM);
864 		}
865 	}
866 	return (0);
867 }
868 
869 /*
870  * This sysctl determines if we will allow a process to chroot(2) if it
871  * has a directory open:
872  *	0: disallowed for all processes.
873  *	1: allowed for processes that were not already chroot(2)'ed.
874  *	2: allowed for all processes.
875  */
876 
877 static int chroot_allow_open_directories = 1;
878 
879 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
880      &chroot_allow_open_directories, 0, "");
881 
882 /*
883  * Change notion of root (``/'') directory.
884  */
885 #ifndef _SYS_SYSPROTO_H_
886 struct chroot_args {
887 	char	*path;
888 };
889 #endif
890 int
891 chroot(td, uap)
892 	struct thread *td;
893 	struct chroot_args /* {
894 		char *path;
895 	} */ *uap;
896 {
897 	int error;
898 	struct nameidata nd;
899 	int vfslocked;
900 
901 	error = priv_check(td, PRIV_VFS_CHROOT);
902 	if (error)
903 		return (error);
904 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
905 	    AUDITVNODE1, UIO_USERSPACE, uap->path, td);
906 	error = namei(&nd);
907 	if (error)
908 		goto error;
909 	vfslocked = NDHASGIANT(&nd);
910 	if ((error = change_dir(nd.ni_vp, td)) != 0)
911 		goto e_vunlock;
912 #ifdef MAC
913 	if ((error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp)))
914 		goto e_vunlock;
915 #endif
916 	VOP_UNLOCK(nd.ni_vp, 0);
917 	error = change_root(nd.ni_vp, td);
918 	vrele(nd.ni_vp);
919 	VFS_UNLOCK_GIANT(vfslocked);
920 	NDFREE(&nd, NDF_ONLY_PNBUF);
921 	return (error);
922 e_vunlock:
923 	vput(nd.ni_vp);
924 	VFS_UNLOCK_GIANT(vfslocked);
925 error:
926 	NDFREE(&nd, NDF_ONLY_PNBUF);
927 	return (error);
928 }
929 
930 /*
931  * Common routine for chroot and chdir.  Callers must provide a locked vnode
932  * instance.
933  */
934 int
935 change_dir(vp, td)
936 	struct vnode *vp;
937 	struct thread *td;
938 {
939 	int error;
940 
941 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
942 	if (vp->v_type != VDIR)
943 		return (ENOTDIR);
944 #ifdef MAC
945 	error = mac_vnode_check_chdir(td->td_ucred, vp);
946 	if (error)
947 		return (error);
948 #endif
949 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
950 	return (error);
951 }
952 
953 /*
954  * Common routine for kern_chroot() and jail_attach().  The caller is
955  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
956  * authorize this operation.
957  */
958 int
959 change_root(vp, td)
960 	struct vnode *vp;
961 	struct thread *td;
962 {
963 	struct filedesc *fdp;
964 	struct vnode *oldvp;
965 	int vfslocked;
966 	int error;
967 
968 	VFS_ASSERT_GIANT(vp->v_mount);
969 	fdp = td->td_proc->p_fd;
970 	FILEDESC_XLOCK(fdp);
971 	if (chroot_allow_open_directories == 0 ||
972 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
973 		error = chroot_refuse_vdir_fds(fdp);
974 		if (error) {
975 			FILEDESC_XUNLOCK(fdp);
976 			return (error);
977 		}
978 	}
979 	oldvp = fdp->fd_rdir;
980 	fdp->fd_rdir = vp;
981 	VREF(fdp->fd_rdir);
982 	if (!fdp->fd_jdir) {
983 		fdp->fd_jdir = vp;
984 		VREF(fdp->fd_jdir);
985 	}
986 	FILEDESC_XUNLOCK(fdp);
987 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
988 	vrele(oldvp);
989 	VFS_UNLOCK_GIANT(vfslocked);
990 	return (0);
991 }
992 
993 /*
994  * Check permissions, allocate an open file structure, and call the device
995  * open routine if any.
996  */
997 #ifndef _SYS_SYSPROTO_H_
998 struct open_args {
999 	char	*path;
1000 	int	flags;
1001 	int	mode;
1002 };
1003 #endif
1004 int
1005 open(td, uap)
1006 	struct thread *td;
1007 	register struct open_args /* {
1008 		char *path;
1009 		int flags;
1010 		int mode;
1011 	} */ *uap;
1012 {
1013 
1014 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
1015 }
1016 
1017 #ifndef _SYS_SYSPROTO_H_
1018 struct openat_args {
1019 	int	fd;
1020 	char	*path;
1021 	int	flag;
1022 	int	mode;
1023 };
1024 #endif
1025 int
1026 openat(struct thread *td, struct openat_args *uap)
1027 {
1028 
1029 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
1030 	    uap->mode));
1031 }
1032 
1033 int
1034 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
1035     int mode)
1036 {
1037 
1038 	return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode));
1039 }
1040 
1041 int
1042 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1043     int flags, int mode)
1044 {
1045 	struct proc *p = td->td_proc;
1046 	struct filedesc *fdp = p->p_fd;
1047 	struct file *fp;
1048 	struct vnode *vp;
1049 	struct vattr vat;
1050 	struct mount *mp;
1051 	int cmode;
1052 	struct file *nfp;
1053 	int type, indx, error;
1054 	struct flock lf;
1055 	struct nameidata nd;
1056 	int vfslocked;
1057 
1058 	AUDIT_ARG_FFLAGS(flags);
1059 	AUDIT_ARG_MODE(mode);
1060 	/* XXX: audit dirfd */
1061 	/*
1062 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR may
1063 	 * be specified.
1064 	 */
1065 	if (flags & O_EXEC) {
1066 		if (flags & O_ACCMODE)
1067 			return (EINVAL);
1068 	} else if ((flags & O_ACCMODE) == O_ACCMODE)
1069 		return (EINVAL);
1070 	else
1071 		flags = FFLAGS(flags);
1072 
1073 	error = falloc(td, &nfp, &indx);
1074 	if (error)
1075 		return (error);
1076 	/* An extra reference on `nfp' has been held for us by falloc(). */
1077 	fp = nfp;
1078 	/* Set the flags early so the finit in devfs can pick them up. */
1079 	fp->f_flag = flags & FMASK;
1080 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1081 	NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
1082 	    td);
1083 	td->td_dupfd = -1;		/* XXX check for fdopen */
1084 	error = vn_open(&nd, &flags, cmode, fp);
1085 	if (error) {
1086 		/*
1087 		 * If the vn_open replaced the method vector, something
1088 		 * wonderous happened deep below and we just pass it up
1089 		 * pretending we know what we do.
1090 		 */
1091 		if (error == ENXIO && fp->f_ops != &badfileops) {
1092 			fdrop(fp, td);
1093 			td->td_retval[0] = indx;
1094 			return (0);
1095 		}
1096 
1097 		/*
1098 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1099 		 * responsible for dropping the old contents of ofiles[indx]
1100 		 * if it succeeds.
1101 		 */
1102 		if ((error == ENODEV || error == ENXIO) &&
1103 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1104 		    (error =
1105 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1106 			td->td_retval[0] = indx;
1107 			fdrop(fp, td);
1108 			return (0);
1109 		}
1110 		/*
1111 		 * Clean up the descriptor, but only if another thread hadn't
1112 		 * replaced or closed it.
1113 		 */
1114 		fdclose(fdp, fp, indx, td);
1115 		fdrop(fp, td);
1116 
1117 		if (error == ERESTART)
1118 			error = EINTR;
1119 		return (error);
1120 	}
1121 	td->td_dupfd = 0;
1122 	vfslocked = NDHASGIANT(&nd);
1123 	NDFREE(&nd, NDF_ONLY_PNBUF);
1124 	vp = nd.ni_vp;
1125 
1126 	fp->f_vnode = vp;	/* XXX Does devfs need this? */
1127 	/*
1128 	 * If the file wasn't claimed by devfs bind it to the normal
1129 	 * vnode operations here.
1130 	 */
1131 	if (fp->f_ops == &badfileops) {
1132 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1133 		fp->f_seqcount = 1;
1134 		finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops);
1135 	}
1136 
1137 	VOP_UNLOCK(vp, 0);
1138 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1139 		lf.l_whence = SEEK_SET;
1140 		lf.l_start = 0;
1141 		lf.l_len = 0;
1142 		if (flags & O_EXLOCK)
1143 			lf.l_type = F_WRLCK;
1144 		else
1145 			lf.l_type = F_RDLCK;
1146 		type = F_FLOCK;
1147 		if ((flags & FNONBLOCK) == 0)
1148 			type |= F_WAIT;
1149 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1150 			    type)) != 0)
1151 			goto bad;
1152 		atomic_set_int(&fp->f_flag, FHASLOCK);
1153 	}
1154 	if (flags & O_TRUNC) {
1155 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1156 			goto bad;
1157 		VATTR_NULL(&vat);
1158 		vat.va_size = 0;
1159 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1160 #ifdef MAC
1161 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
1162 		if (error == 0)
1163 #endif
1164 			error = VOP_SETATTR(vp, &vat, td->td_ucred);
1165 		VOP_UNLOCK(vp, 0);
1166 		vn_finished_write(mp);
1167 		if (error)
1168 			goto bad;
1169 	}
1170 	VFS_UNLOCK_GIANT(vfslocked);
1171 	/*
1172 	 * Release our private reference, leaving the one associated with
1173 	 * the descriptor table intact.
1174 	 */
1175 	fdrop(fp, td);
1176 	td->td_retval[0] = indx;
1177 	return (0);
1178 bad:
1179 	VFS_UNLOCK_GIANT(vfslocked);
1180 	fdclose(fdp, fp, indx, td);
1181 	fdrop(fp, td);
1182 	return (error);
1183 }
1184 
1185 #ifdef COMPAT_43
1186 /*
1187  * Create a file.
1188  */
1189 #ifndef _SYS_SYSPROTO_H_
1190 struct ocreat_args {
1191 	char	*path;
1192 	int	mode;
1193 };
1194 #endif
1195 int
1196 ocreat(td, uap)
1197 	struct thread *td;
1198 	register struct ocreat_args /* {
1199 		char *path;
1200 		int mode;
1201 	} */ *uap;
1202 {
1203 
1204 	return (kern_open(td, uap->path, UIO_USERSPACE,
1205 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1206 }
1207 #endif /* COMPAT_43 */
1208 
1209 /*
1210  * Create a special file.
1211  */
1212 #ifndef _SYS_SYSPROTO_H_
1213 struct mknod_args {
1214 	char	*path;
1215 	int	mode;
1216 	int	dev;
1217 };
1218 #endif
1219 int
1220 mknod(td, uap)
1221 	struct thread *td;
1222 	register struct mknod_args /* {
1223 		char *path;
1224 		int mode;
1225 		int dev;
1226 	} */ *uap;
1227 {
1228 
1229 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1230 }
1231 
1232 #ifndef _SYS_SYSPROTO_H_
1233 struct mknodat_args {
1234 	int	fd;
1235 	char	*path;
1236 	mode_t	mode;
1237 	dev_t	dev;
1238 };
1239 #endif
1240 int
1241 mknodat(struct thread *td, struct mknodat_args *uap)
1242 {
1243 
1244 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1245 	    uap->dev));
1246 }
1247 
1248 int
1249 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1250     int dev)
1251 {
1252 
1253 	return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev));
1254 }
1255 
1256 int
1257 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1258     int mode, int dev)
1259 {
1260 	struct vnode *vp;
1261 	struct mount *mp;
1262 	struct vattr vattr;
1263 	int error;
1264 	int whiteout = 0;
1265 	struct nameidata nd;
1266 	int vfslocked;
1267 
1268 	AUDIT_ARG_MODE(mode);
1269 	AUDIT_ARG_DEV(dev);
1270 	switch (mode & S_IFMT) {
1271 	case S_IFCHR:
1272 	case S_IFBLK:
1273 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1274 		break;
1275 	case S_IFMT:
1276 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1277 		break;
1278 	case S_IFWHT:
1279 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1280 		break;
1281 	case S_IFIFO:
1282 		if (dev == 0)
1283 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1284 		/* FALLTHROUGH */
1285 	default:
1286 		error = EINVAL;
1287 		break;
1288 	}
1289 	if (error)
1290 		return (error);
1291 restart:
1292 	bwillwrite();
1293 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1294 	    pathseg, path, fd, td);
1295 	if ((error = namei(&nd)) != 0)
1296 		return (error);
1297 	vfslocked = NDHASGIANT(&nd);
1298 	vp = nd.ni_vp;
1299 	if (vp != NULL) {
1300 		NDFREE(&nd, NDF_ONLY_PNBUF);
1301 		if (vp == nd.ni_dvp)
1302 			vrele(nd.ni_dvp);
1303 		else
1304 			vput(nd.ni_dvp);
1305 		vrele(vp);
1306 		VFS_UNLOCK_GIANT(vfslocked);
1307 		return (EEXIST);
1308 	} else {
1309 		VATTR_NULL(&vattr);
1310 		vattr.va_mode = (mode & ALLPERMS) &
1311 		    ~td->td_proc->p_fd->fd_cmask;
1312 		vattr.va_rdev = dev;
1313 		whiteout = 0;
1314 
1315 		switch (mode & S_IFMT) {
1316 		case S_IFMT:	/* used by badsect to flag bad sectors */
1317 			vattr.va_type = VBAD;
1318 			break;
1319 		case S_IFCHR:
1320 			vattr.va_type = VCHR;
1321 			break;
1322 		case S_IFBLK:
1323 			vattr.va_type = VBLK;
1324 			break;
1325 		case S_IFWHT:
1326 			whiteout = 1;
1327 			break;
1328 		default:
1329 			panic("kern_mknod: invalid mode");
1330 		}
1331 	}
1332 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1333 		NDFREE(&nd, NDF_ONLY_PNBUF);
1334 		vput(nd.ni_dvp);
1335 		VFS_UNLOCK_GIANT(vfslocked);
1336 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1337 			return (error);
1338 		goto restart;
1339 	}
1340 #ifdef MAC
1341 	if (error == 0 && !whiteout)
1342 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1343 		    &nd.ni_cnd, &vattr);
1344 #endif
1345 	if (!error) {
1346 		if (whiteout)
1347 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1348 		else {
1349 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1350 						&nd.ni_cnd, &vattr);
1351 			if (error == 0)
1352 				vput(nd.ni_vp);
1353 		}
1354 	}
1355 	NDFREE(&nd, NDF_ONLY_PNBUF);
1356 	vput(nd.ni_dvp);
1357 	vn_finished_write(mp);
1358 	VFS_UNLOCK_GIANT(vfslocked);
1359 	return (error);
1360 }
1361 
1362 /*
1363  * Create a named pipe.
1364  */
1365 #ifndef _SYS_SYSPROTO_H_
1366 struct mkfifo_args {
1367 	char	*path;
1368 	int	mode;
1369 };
1370 #endif
1371 int
1372 mkfifo(td, uap)
1373 	struct thread *td;
1374 	register struct mkfifo_args /* {
1375 		char *path;
1376 		int mode;
1377 	} */ *uap;
1378 {
1379 
1380 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1381 }
1382 
1383 #ifndef _SYS_SYSPROTO_H_
1384 struct mkfifoat_args {
1385 	int	fd;
1386 	char	*path;
1387 	mode_t	mode;
1388 };
1389 #endif
1390 int
1391 mkfifoat(struct thread *td, struct mkfifoat_args *uap)
1392 {
1393 
1394 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
1395 	    uap->mode));
1396 }
1397 
1398 int
1399 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1400 {
1401 
1402 	return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode));
1403 }
1404 
1405 int
1406 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1407     int mode)
1408 {
1409 	struct mount *mp;
1410 	struct vattr vattr;
1411 	int error;
1412 	struct nameidata nd;
1413 	int vfslocked;
1414 
1415 	AUDIT_ARG_MODE(mode);
1416 restart:
1417 	bwillwrite();
1418 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1419 	    pathseg, path, fd, td);
1420 	if ((error = namei(&nd)) != 0)
1421 		return (error);
1422 	vfslocked = NDHASGIANT(&nd);
1423 	if (nd.ni_vp != NULL) {
1424 		NDFREE(&nd, NDF_ONLY_PNBUF);
1425 		if (nd.ni_vp == nd.ni_dvp)
1426 			vrele(nd.ni_dvp);
1427 		else
1428 			vput(nd.ni_dvp);
1429 		vrele(nd.ni_vp);
1430 		VFS_UNLOCK_GIANT(vfslocked);
1431 		return (EEXIST);
1432 	}
1433 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1434 		NDFREE(&nd, NDF_ONLY_PNBUF);
1435 		vput(nd.ni_dvp);
1436 		VFS_UNLOCK_GIANT(vfslocked);
1437 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1438 			return (error);
1439 		goto restart;
1440 	}
1441 	VATTR_NULL(&vattr);
1442 	vattr.va_type = VFIFO;
1443 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1444 #ifdef MAC
1445 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1446 	    &vattr);
1447 	if (error)
1448 		goto out;
1449 #endif
1450 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1451 	if (error == 0)
1452 		vput(nd.ni_vp);
1453 #ifdef MAC
1454 out:
1455 #endif
1456 	vput(nd.ni_dvp);
1457 	vn_finished_write(mp);
1458 	VFS_UNLOCK_GIANT(vfslocked);
1459 	NDFREE(&nd, NDF_ONLY_PNBUF);
1460 	return (error);
1461 }
1462 
1463 /*
1464  * Make a hard file link.
1465  */
1466 #ifndef _SYS_SYSPROTO_H_
1467 struct link_args {
1468 	char	*path;
1469 	char	*link;
1470 };
1471 #endif
1472 int
1473 link(td, uap)
1474 	struct thread *td;
1475 	register struct link_args /* {
1476 		char *path;
1477 		char *link;
1478 	} */ *uap;
1479 {
1480 
1481 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1482 }
1483 
1484 #ifndef _SYS_SYSPROTO_H_
1485 struct linkat_args {
1486 	int	fd1;
1487 	char	*path1;
1488 	int	fd2;
1489 	char	*path2;
1490 	int	flag;
1491 };
1492 #endif
1493 int
1494 linkat(struct thread *td, struct linkat_args *uap)
1495 {
1496 	int flag;
1497 
1498 	flag = uap->flag;
1499 	if (flag & ~AT_SYMLINK_FOLLOW)
1500 		return (EINVAL);
1501 
1502 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1503 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1504 }
1505 
1506 int hardlink_check_uid = 0;
1507 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1508     &hardlink_check_uid, 0,
1509     "Unprivileged processes cannot create hard links to files owned by other "
1510     "users");
1511 static int hardlink_check_gid = 0;
1512 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1513     &hardlink_check_gid, 0,
1514     "Unprivileged processes cannot create hard links to files owned by other "
1515     "groups");
1516 
1517 static int
1518 can_hardlink(struct vnode *vp, struct ucred *cred)
1519 {
1520 	struct vattr va;
1521 	int error;
1522 
1523 	if (!hardlink_check_uid && !hardlink_check_gid)
1524 		return (0);
1525 
1526 	error = VOP_GETATTR(vp, &va, cred);
1527 	if (error != 0)
1528 		return (error);
1529 
1530 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1531 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1532 		if (error)
1533 			return (error);
1534 	}
1535 
1536 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1537 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1538 		if (error)
1539 			return (error);
1540 	}
1541 
1542 	return (0);
1543 }
1544 
1545 int
1546 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1547 {
1548 
1549 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW));
1550 }
1551 
1552 int
1553 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1554     enum uio_seg segflg, int follow)
1555 {
1556 	struct vnode *vp;
1557 	struct mount *mp;
1558 	struct nameidata nd;
1559 	int vfslocked;
1560 	int lvfslocked;
1561 	int error;
1562 
1563 	bwillwrite();
1564 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, segflg, path1,
1565 	    fd1, td);
1566 
1567 	if ((error = namei(&nd)) != 0)
1568 		return (error);
1569 	vfslocked = NDHASGIANT(&nd);
1570 	NDFREE(&nd, NDF_ONLY_PNBUF);
1571 	vp = nd.ni_vp;
1572 	if (vp->v_type == VDIR) {
1573 		vrele(vp);
1574 		VFS_UNLOCK_GIANT(vfslocked);
1575 		return (EPERM);		/* POSIX */
1576 	}
1577 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1578 		vrele(vp);
1579 		VFS_UNLOCK_GIANT(vfslocked);
1580 		return (error);
1581 	}
1582 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1583 	    segflg, path2, fd2, td);
1584 	if ((error = namei(&nd)) == 0) {
1585 		lvfslocked = NDHASGIANT(&nd);
1586 		if (nd.ni_vp != NULL) {
1587 			if (nd.ni_dvp == nd.ni_vp)
1588 				vrele(nd.ni_dvp);
1589 			else
1590 				vput(nd.ni_dvp);
1591 			vrele(nd.ni_vp);
1592 			error = EEXIST;
1593 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY))
1594 		    == 0) {
1595 			error = can_hardlink(vp, td->td_ucred);
1596 			if (error == 0)
1597 #ifdef MAC
1598 				error = mac_vnode_check_link(td->td_ucred,
1599 				    nd.ni_dvp, vp, &nd.ni_cnd);
1600 			if (error == 0)
1601 #endif
1602 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1603 			VOP_UNLOCK(vp, 0);
1604 			vput(nd.ni_dvp);
1605 		}
1606 		NDFREE(&nd, NDF_ONLY_PNBUF);
1607 		VFS_UNLOCK_GIANT(lvfslocked);
1608 	}
1609 	vrele(vp);
1610 	vn_finished_write(mp);
1611 	VFS_UNLOCK_GIANT(vfslocked);
1612 	return (error);
1613 }
1614 
1615 /*
1616  * Make a symbolic link.
1617  */
1618 #ifndef _SYS_SYSPROTO_H_
1619 struct symlink_args {
1620 	char	*path;
1621 	char	*link;
1622 };
1623 #endif
1624 int
1625 symlink(td, uap)
1626 	struct thread *td;
1627 	register struct symlink_args /* {
1628 		char *path;
1629 		char *link;
1630 	} */ *uap;
1631 {
1632 
1633 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1634 }
1635 
1636 #ifndef _SYS_SYSPROTO_H_
1637 struct symlinkat_args {
1638 	char	*path;
1639 	int	fd;
1640 	char	*path2;
1641 };
1642 #endif
1643 int
1644 symlinkat(struct thread *td, struct symlinkat_args *uap)
1645 {
1646 
1647 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1648 	    UIO_USERSPACE));
1649 }
1650 
1651 int
1652 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1653 {
1654 
1655 	return (kern_symlinkat(td, path, AT_FDCWD, link, segflg));
1656 }
1657 
1658 int
1659 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1660     enum uio_seg segflg)
1661 {
1662 	struct mount *mp;
1663 	struct vattr vattr;
1664 	char *syspath;
1665 	int error;
1666 	struct nameidata nd;
1667 	int vfslocked;
1668 
1669 	if (segflg == UIO_SYSSPACE) {
1670 		syspath = path1;
1671 	} else {
1672 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1673 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1674 			goto out;
1675 	}
1676 	AUDIT_ARG_TEXT(syspath);
1677 restart:
1678 	bwillwrite();
1679 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1680 	    segflg, path2, fd, td);
1681 	if ((error = namei(&nd)) != 0)
1682 		goto out;
1683 	vfslocked = NDHASGIANT(&nd);
1684 	if (nd.ni_vp) {
1685 		NDFREE(&nd, NDF_ONLY_PNBUF);
1686 		if (nd.ni_vp == nd.ni_dvp)
1687 			vrele(nd.ni_dvp);
1688 		else
1689 			vput(nd.ni_dvp);
1690 		vrele(nd.ni_vp);
1691 		VFS_UNLOCK_GIANT(vfslocked);
1692 		error = EEXIST;
1693 		goto out;
1694 	}
1695 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1696 		NDFREE(&nd, NDF_ONLY_PNBUF);
1697 		vput(nd.ni_dvp);
1698 		VFS_UNLOCK_GIANT(vfslocked);
1699 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1700 			goto out;
1701 		goto restart;
1702 	}
1703 	VATTR_NULL(&vattr);
1704 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1705 #ifdef MAC
1706 	vattr.va_type = VLNK;
1707 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1708 	    &vattr);
1709 	if (error)
1710 		goto out2;
1711 #endif
1712 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1713 	if (error == 0)
1714 		vput(nd.ni_vp);
1715 #ifdef MAC
1716 out2:
1717 #endif
1718 	NDFREE(&nd, NDF_ONLY_PNBUF);
1719 	vput(nd.ni_dvp);
1720 	vn_finished_write(mp);
1721 	VFS_UNLOCK_GIANT(vfslocked);
1722 out:
1723 	if (segflg != UIO_SYSSPACE)
1724 		uma_zfree(namei_zone, syspath);
1725 	return (error);
1726 }
1727 
1728 /*
1729  * Delete a whiteout from the filesystem.
1730  */
1731 int
1732 undelete(td, uap)
1733 	struct thread *td;
1734 	register struct undelete_args /* {
1735 		char *path;
1736 	} */ *uap;
1737 {
1738 	int error;
1739 	struct mount *mp;
1740 	struct nameidata nd;
1741 	int vfslocked;
1742 
1743 restart:
1744 	bwillwrite();
1745 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1746 	    UIO_USERSPACE, uap->path, td);
1747 	error = namei(&nd);
1748 	if (error)
1749 		return (error);
1750 	vfslocked = NDHASGIANT(&nd);
1751 
1752 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1753 		NDFREE(&nd, NDF_ONLY_PNBUF);
1754 		if (nd.ni_vp == nd.ni_dvp)
1755 			vrele(nd.ni_dvp);
1756 		else
1757 			vput(nd.ni_dvp);
1758 		if (nd.ni_vp)
1759 			vrele(nd.ni_vp);
1760 		VFS_UNLOCK_GIANT(vfslocked);
1761 		return (EEXIST);
1762 	}
1763 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1764 		NDFREE(&nd, NDF_ONLY_PNBUF);
1765 		vput(nd.ni_dvp);
1766 		VFS_UNLOCK_GIANT(vfslocked);
1767 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1768 			return (error);
1769 		goto restart;
1770 	}
1771 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1772 	NDFREE(&nd, NDF_ONLY_PNBUF);
1773 	vput(nd.ni_dvp);
1774 	vn_finished_write(mp);
1775 	VFS_UNLOCK_GIANT(vfslocked);
1776 	return (error);
1777 }
1778 
1779 /*
1780  * Delete a name from the filesystem.
1781  */
1782 #ifndef _SYS_SYSPROTO_H_
1783 struct unlink_args {
1784 	char	*path;
1785 };
1786 #endif
1787 int
1788 unlink(td, uap)
1789 	struct thread *td;
1790 	struct unlink_args /* {
1791 		char *path;
1792 	} */ *uap;
1793 {
1794 
1795 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1796 }
1797 
1798 #ifndef _SYS_SYSPROTO_H_
1799 struct unlinkat_args {
1800 	int	fd;
1801 	char	*path;
1802 	int	flag;
1803 };
1804 #endif
1805 int
1806 unlinkat(struct thread *td, struct unlinkat_args *uap)
1807 {
1808 	int flag = uap->flag;
1809 	int fd = uap->fd;
1810 	char *path = uap->path;
1811 
1812 	if (flag & ~AT_REMOVEDIR)
1813 		return (EINVAL);
1814 
1815 	if (flag & AT_REMOVEDIR)
1816 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
1817 	else
1818 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE));
1819 }
1820 
1821 int
1822 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1823 {
1824 
1825 	return (kern_unlinkat(td, AT_FDCWD, path, pathseg));
1826 }
1827 
1828 int
1829 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
1830 {
1831 	struct mount *mp;
1832 	struct vnode *vp;
1833 	int error;
1834 	struct nameidata nd;
1835 	int vfslocked;
1836 
1837 restart:
1838 	bwillwrite();
1839 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1840 	    pathseg, path, fd, td);
1841 	if ((error = namei(&nd)) != 0)
1842 		return (error == EINVAL ? EPERM : error);
1843 	vfslocked = NDHASGIANT(&nd);
1844 	vp = nd.ni_vp;
1845 	if (vp->v_type == VDIR)
1846 		error = EPERM;		/* POSIX */
1847 	else {
1848 		/*
1849 		 * The root of a mounted filesystem cannot be deleted.
1850 		 *
1851 		 * XXX: can this only be a VDIR case?
1852 		 */
1853 		if (vp->v_vflag & VV_ROOT)
1854 			error = EBUSY;
1855 	}
1856 	if (error == 0) {
1857 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1858 			NDFREE(&nd, NDF_ONLY_PNBUF);
1859 			vput(nd.ni_dvp);
1860 			if (vp == nd.ni_dvp)
1861 				vrele(vp);
1862 			else
1863 				vput(vp);
1864 			VFS_UNLOCK_GIANT(vfslocked);
1865 			if ((error = vn_start_write(NULL, &mp,
1866 			    V_XSLEEP | PCATCH)) != 0)
1867 				return (error);
1868 			goto restart;
1869 		}
1870 #ifdef MAC
1871 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1872 		    &nd.ni_cnd);
1873 		if (error)
1874 			goto out;
1875 #endif
1876 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1877 #ifdef MAC
1878 out:
1879 #endif
1880 		vn_finished_write(mp);
1881 	}
1882 	NDFREE(&nd, NDF_ONLY_PNBUF);
1883 	vput(nd.ni_dvp);
1884 	if (vp == nd.ni_dvp)
1885 		vrele(vp);
1886 	else
1887 		vput(vp);
1888 	VFS_UNLOCK_GIANT(vfslocked);
1889 	return (error);
1890 }
1891 
1892 /*
1893  * Reposition read/write file offset.
1894  */
1895 #ifndef _SYS_SYSPROTO_H_
1896 struct lseek_args {
1897 	int	fd;
1898 	int	pad;
1899 	off_t	offset;
1900 	int	whence;
1901 };
1902 #endif
1903 int
1904 lseek(td, uap)
1905 	struct thread *td;
1906 	register struct lseek_args /* {
1907 		int fd;
1908 		int pad;
1909 		off_t offset;
1910 		int whence;
1911 	} */ *uap;
1912 {
1913 	struct ucred *cred = td->td_ucred;
1914 	struct file *fp;
1915 	struct vnode *vp;
1916 	struct vattr vattr;
1917 	off_t offset;
1918 	int error, noneg;
1919 	int vfslocked;
1920 
1921 	AUDIT_ARG_FD(uap->fd);
1922 	if ((error = fget(td, uap->fd, &fp)) != 0)
1923 		return (error);
1924 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1925 		fdrop(fp, td);
1926 		return (ESPIPE);
1927 	}
1928 	vp = fp->f_vnode;
1929 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1930 	noneg = (vp->v_type != VCHR);
1931 	offset = uap->offset;
1932 	switch (uap->whence) {
1933 	case L_INCR:
1934 		if (noneg &&
1935 		    (fp->f_offset < 0 ||
1936 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1937 			error = EOVERFLOW;
1938 			break;
1939 		}
1940 		offset += fp->f_offset;
1941 		break;
1942 	case L_XTND:
1943 		vn_lock(vp, LK_SHARED | LK_RETRY);
1944 		error = VOP_GETATTR(vp, &vattr, cred);
1945 		VOP_UNLOCK(vp, 0);
1946 		if (error)
1947 			break;
1948 		if (noneg &&
1949 		    (vattr.va_size > OFF_MAX ||
1950 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1951 			error = EOVERFLOW;
1952 			break;
1953 		}
1954 		offset += vattr.va_size;
1955 		break;
1956 	case L_SET:
1957 		break;
1958 	case SEEK_DATA:
1959 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
1960 		break;
1961 	case SEEK_HOLE:
1962 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
1963 		break;
1964 	default:
1965 		error = EINVAL;
1966 	}
1967 	if (error == 0 && noneg && offset < 0)
1968 		error = EINVAL;
1969 	if (error != 0)
1970 		goto drop;
1971 	fp->f_offset = offset;
1972 	*(off_t *)(td->td_retval) = fp->f_offset;
1973 drop:
1974 	fdrop(fp, td);
1975 	VFS_UNLOCK_GIANT(vfslocked);
1976 	return (error);
1977 }
1978 
1979 #if defined(COMPAT_43)
1980 /*
1981  * Reposition read/write file offset.
1982  */
1983 #ifndef _SYS_SYSPROTO_H_
1984 struct olseek_args {
1985 	int	fd;
1986 	long	offset;
1987 	int	whence;
1988 };
1989 #endif
1990 int
1991 olseek(td, uap)
1992 	struct thread *td;
1993 	register struct olseek_args /* {
1994 		int fd;
1995 		long offset;
1996 		int whence;
1997 	} */ *uap;
1998 {
1999 	struct lseek_args /* {
2000 		int fd;
2001 		int pad;
2002 		off_t offset;
2003 		int whence;
2004 	} */ nuap;
2005 
2006 	nuap.fd = uap->fd;
2007 	nuap.offset = uap->offset;
2008 	nuap.whence = uap->whence;
2009 	return (lseek(td, &nuap));
2010 }
2011 #endif /* COMPAT_43 */
2012 
2013 /* Version with the 'pad' argument */
2014 int
2015 freebsd6_lseek(td, uap)
2016 	struct thread *td;
2017 	register struct freebsd6_lseek_args *uap;
2018 {
2019 	struct lseek_args ouap;
2020 
2021 	ouap.fd = uap->fd;
2022 	ouap.offset = uap->offset;
2023 	ouap.whence = uap->whence;
2024 	return (lseek(td, &ouap));
2025 }
2026 
2027 /*
2028  * Check access permissions using passed credentials.
2029  */
2030 static int
2031 vn_access(vp, user_flags, cred, td)
2032 	struct vnode	*vp;
2033 	int		user_flags;
2034 	struct ucred	*cred;
2035 	struct thread	*td;
2036 {
2037 	int error;
2038 	accmode_t accmode;
2039 
2040 	/* Flags == 0 means only check for existence. */
2041 	error = 0;
2042 	if (user_flags) {
2043 		accmode = 0;
2044 		if (user_flags & R_OK)
2045 			accmode |= VREAD;
2046 		if (user_flags & W_OK)
2047 			accmode |= VWRITE;
2048 		if (user_flags & X_OK)
2049 			accmode |= VEXEC;
2050 #ifdef MAC
2051 		error = mac_vnode_check_access(cred, vp, accmode);
2052 		if (error)
2053 			return (error);
2054 #endif
2055 		if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2056 			error = VOP_ACCESS(vp, accmode, cred, td);
2057 	}
2058 	return (error);
2059 }
2060 
2061 /*
2062  * Check access permissions using "real" credentials.
2063  */
2064 #ifndef _SYS_SYSPROTO_H_
2065 struct access_args {
2066 	char	*path;
2067 	int	flags;
2068 };
2069 #endif
2070 int
2071 access(td, uap)
2072 	struct thread *td;
2073 	register struct access_args /* {
2074 		char *path;
2075 		int flags;
2076 	} */ *uap;
2077 {
2078 
2079 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
2080 }
2081 
2082 #ifndef _SYS_SYSPROTO_H_
2083 struct faccessat_args {
2084 	int	dirfd;
2085 	char	*path;
2086 	int	mode;
2087 	int	flag;
2088 }
2089 #endif
2090 int
2091 faccessat(struct thread *td, struct faccessat_args *uap)
2092 {
2093 
2094 	if (uap->flag & ~AT_EACCESS)
2095 		return (EINVAL);
2096 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
2097 	    uap->mode));
2098 }
2099 
2100 int
2101 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2102 {
2103 
2104 	return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, mode));
2105 }
2106 
2107 int
2108 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2109     int flags, int mode)
2110 {
2111 	struct ucred *cred, *tmpcred;
2112 	struct vnode *vp;
2113 	struct nameidata nd;
2114 	int vfslocked;
2115 	int error;
2116 
2117 	/*
2118 	 * Create and modify a temporary credential instead of one that
2119 	 * is potentially shared.  This could also mess up socket
2120 	 * buffer accounting which can run in an interrupt context.
2121 	 */
2122 	if (!(flags & AT_EACCESS)) {
2123 		cred = td->td_ucred;
2124 		tmpcred = crdup(cred);
2125 		tmpcred->cr_uid = cred->cr_ruid;
2126 		tmpcred->cr_groups[0] = cred->cr_rgid;
2127 		td->td_ucred = tmpcred;
2128 	} else
2129 		cred = tmpcred = td->td_ucred;
2130 	AUDIT_ARG_VALUE(mode);
2131 	NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2132 	    AUDITVNODE1, pathseg, path, fd, td);
2133 	if ((error = namei(&nd)) != 0)
2134 		goto out1;
2135 	vfslocked = NDHASGIANT(&nd);
2136 	vp = nd.ni_vp;
2137 
2138 	error = vn_access(vp, mode, tmpcred, td);
2139 	NDFREE(&nd, NDF_ONLY_PNBUF);
2140 	vput(vp);
2141 	VFS_UNLOCK_GIANT(vfslocked);
2142 out1:
2143 	if (!(flags & AT_EACCESS)) {
2144 		td->td_ucred = cred;
2145 		crfree(tmpcred);
2146 	}
2147 	return (error);
2148 }
2149 
2150 /*
2151  * Check access permissions using "effective" credentials.
2152  */
2153 #ifndef _SYS_SYSPROTO_H_
2154 struct eaccess_args {
2155 	char	*path;
2156 	int	flags;
2157 };
2158 #endif
2159 int
2160 eaccess(td, uap)
2161 	struct thread *td;
2162 	register struct eaccess_args /* {
2163 		char *path;
2164 		int flags;
2165 	} */ *uap;
2166 {
2167 
2168 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
2169 }
2170 
2171 int
2172 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
2173 {
2174 
2175 	return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, flags));
2176 }
2177 
2178 #if defined(COMPAT_43)
2179 /*
2180  * Get file status; this version follows links.
2181  */
2182 #ifndef _SYS_SYSPROTO_H_
2183 struct ostat_args {
2184 	char	*path;
2185 	struct ostat *ub;
2186 };
2187 #endif
2188 int
2189 ostat(td, uap)
2190 	struct thread *td;
2191 	register struct ostat_args /* {
2192 		char *path;
2193 		struct ostat *ub;
2194 	} */ *uap;
2195 {
2196 	struct stat sb;
2197 	struct ostat osb;
2198 	int error;
2199 
2200 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2201 	if (error)
2202 		return (error);
2203 	cvtstat(&sb, &osb);
2204 	error = copyout(&osb, uap->ub, sizeof (osb));
2205 	return (error);
2206 }
2207 
2208 /*
2209  * Get file status; this version does not follow links.
2210  */
2211 #ifndef _SYS_SYSPROTO_H_
2212 struct olstat_args {
2213 	char	*path;
2214 	struct ostat *ub;
2215 };
2216 #endif
2217 int
2218 olstat(td, uap)
2219 	struct thread *td;
2220 	register struct olstat_args /* {
2221 		char *path;
2222 		struct ostat *ub;
2223 	} */ *uap;
2224 {
2225 	struct stat sb;
2226 	struct ostat osb;
2227 	int error;
2228 
2229 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2230 	if (error)
2231 		return (error);
2232 	cvtstat(&sb, &osb);
2233 	error = copyout(&osb, uap->ub, sizeof (osb));
2234 	return (error);
2235 }
2236 
2237 /*
2238  * Convert from an old to a new stat structure.
2239  */
2240 void
2241 cvtstat(st, ost)
2242 	struct stat *st;
2243 	struct ostat *ost;
2244 {
2245 
2246 	ost->st_dev = st->st_dev;
2247 	ost->st_ino = st->st_ino;
2248 	ost->st_mode = st->st_mode;
2249 	ost->st_nlink = st->st_nlink;
2250 	ost->st_uid = st->st_uid;
2251 	ost->st_gid = st->st_gid;
2252 	ost->st_rdev = st->st_rdev;
2253 	if (st->st_size < (quad_t)1 << 32)
2254 		ost->st_size = st->st_size;
2255 	else
2256 		ost->st_size = -2;
2257 	ost->st_atime = st->st_atime;
2258 	ost->st_mtime = st->st_mtime;
2259 	ost->st_ctime = st->st_ctime;
2260 	ost->st_blksize = st->st_blksize;
2261 	ost->st_blocks = st->st_blocks;
2262 	ost->st_flags = st->st_flags;
2263 	ost->st_gen = st->st_gen;
2264 }
2265 #endif /* COMPAT_43 */
2266 
2267 /*
2268  * Get file status; this version follows links.
2269  */
2270 #ifndef _SYS_SYSPROTO_H_
2271 struct stat_args {
2272 	char	*path;
2273 	struct stat *ub;
2274 };
2275 #endif
2276 int
2277 stat(td, uap)
2278 	struct thread *td;
2279 	register struct stat_args /* {
2280 		char *path;
2281 		struct stat *ub;
2282 	} */ *uap;
2283 {
2284 	struct stat sb;
2285 	int error;
2286 
2287 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2288 	if (error == 0)
2289 		error = copyout(&sb, uap->ub, sizeof (sb));
2290 	return (error);
2291 }
2292 
2293 #ifndef _SYS_SYSPROTO_H_
2294 struct fstatat_args {
2295 	int	fd;
2296 	char	*path;
2297 	struct stat	*buf;
2298 	int	flag;
2299 }
2300 #endif
2301 int
2302 fstatat(struct thread *td, struct fstatat_args *uap)
2303 {
2304 	struct stat sb;
2305 	int error;
2306 
2307 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2308 	    UIO_USERSPACE, &sb);
2309 	if (error == 0)
2310 		error = copyout(&sb, uap->buf, sizeof (sb));
2311 	return (error);
2312 }
2313 
2314 int
2315 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2316 {
2317 
2318 	return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp));
2319 }
2320 
2321 int
2322 kern_statat(struct thread *td, int flag, int fd, char *path,
2323     enum uio_seg pathseg, struct stat *sbp)
2324 {
2325 
2326 	return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL));
2327 }
2328 
2329 int
2330 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
2331     enum uio_seg pathseg, struct stat *sbp,
2332     void (*hook)(struct vnode *vp, struct stat *sbp))
2333 {
2334 	struct nameidata nd;
2335 	struct stat sb;
2336 	int error, vfslocked;
2337 
2338 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2339 		return (EINVAL);
2340 
2341 	NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2342 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
2343 	    path, fd, td);
2344 
2345 	if ((error = namei(&nd)) != 0)
2346 		return (error);
2347 	vfslocked = NDHASGIANT(&nd);
2348 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2349 	if (!error) {
2350 		SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0);
2351 		if (S_ISREG(sb.st_mode))
2352 			SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0);
2353 		if (__predict_false(hook != NULL))
2354 			hook(nd.ni_vp, &sb);
2355 	}
2356 	NDFREE(&nd, NDF_ONLY_PNBUF);
2357 	vput(nd.ni_vp);
2358 	VFS_UNLOCK_GIANT(vfslocked);
2359 	if (error)
2360 		return (error);
2361 	*sbp = sb;
2362 #ifdef KTRACE
2363 	if (KTRPOINT(td, KTR_STRUCT))
2364 		ktrstat(&sb);
2365 #endif
2366 	return (0);
2367 }
2368 
2369 /*
2370  * Get file status; this version does not follow links.
2371  */
2372 #ifndef _SYS_SYSPROTO_H_
2373 struct lstat_args {
2374 	char	*path;
2375 	struct stat *ub;
2376 };
2377 #endif
2378 int
2379 lstat(td, uap)
2380 	struct thread *td;
2381 	register struct lstat_args /* {
2382 		char *path;
2383 		struct stat *ub;
2384 	} */ *uap;
2385 {
2386 	struct stat sb;
2387 	int error;
2388 
2389 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2390 	if (error == 0)
2391 		error = copyout(&sb, uap->ub, sizeof (sb));
2392 	return (error);
2393 }
2394 
2395 int
2396 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2397 {
2398 
2399 	return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg,
2400 	    sbp));
2401 }
2402 
2403 /*
2404  * Implementation of the NetBSD [l]stat() functions.
2405  */
2406 void
2407 cvtnstat(sb, nsb)
2408 	struct stat *sb;
2409 	struct nstat *nsb;
2410 {
2411 	bzero(nsb, sizeof *nsb);
2412 	nsb->st_dev = sb->st_dev;
2413 	nsb->st_ino = sb->st_ino;
2414 	nsb->st_mode = sb->st_mode;
2415 	nsb->st_nlink = sb->st_nlink;
2416 	nsb->st_uid = sb->st_uid;
2417 	nsb->st_gid = sb->st_gid;
2418 	nsb->st_rdev = sb->st_rdev;
2419 	nsb->st_atimespec = sb->st_atimespec;
2420 	nsb->st_mtimespec = sb->st_mtimespec;
2421 	nsb->st_ctimespec = sb->st_ctimespec;
2422 	nsb->st_size = sb->st_size;
2423 	nsb->st_blocks = sb->st_blocks;
2424 	nsb->st_blksize = sb->st_blksize;
2425 	nsb->st_flags = sb->st_flags;
2426 	nsb->st_gen = sb->st_gen;
2427 	nsb->st_birthtimespec = sb->st_birthtimespec;
2428 }
2429 
2430 #ifndef _SYS_SYSPROTO_H_
2431 struct nstat_args {
2432 	char	*path;
2433 	struct nstat *ub;
2434 };
2435 #endif
2436 int
2437 nstat(td, uap)
2438 	struct thread *td;
2439 	register struct nstat_args /* {
2440 		char *path;
2441 		struct nstat *ub;
2442 	} */ *uap;
2443 {
2444 	struct stat sb;
2445 	struct nstat nsb;
2446 	int error;
2447 
2448 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2449 	if (error)
2450 		return (error);
2451 	cvtnstat(&sb, &nsb);
2452 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2453 	return (error);
2454 }
2455 
2456 /*
2457  * NetBSD lstat.  Get file status; this version does not follow links.
2458  */
2459 #ifndef _SYS_SYSPROTO_H_
2460 struct lstat_args {
2461 	char	*path;
2462 	struct stat *ub;
2463 };
2464 #endif
2465 int
2466 nlstat(td, uap)
2467 	struct thread *td;
2468 	register struct nlstat_args /* {
2469 		char *path;
2470 		struct nstat *ub;
2471 	} */ *uap;
2472 {
2473 	struct stat sb;
2474 	struct nstat nsb;
2475 	int error;
2476 
2477 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2478 	if (error)
2479 		return (error);
2480 	cvtnstat(&sb, &nsb);
2481 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2482 	return (error);
2483 }
2484 
2485 /*
2486  * Get configurable pathname variables.
2487  */
2488 #ifndef _SYS_SYSPROTO_H_
2489 struct pathconf_args {
2490 	char	*path;
2491 	int	name;
2492 };
2493 #endif
2494 int
2495 pathconf(td, uap)
2496 	struct thread *td;
2497 	register struct pathconf_args /* {
2498 		char *path;
2499 		int name;
2500 	} */ *uap;
2501 {
2502 
2503 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
2504 }
2505 
2506 #ifndef _SYS_SYSPROTO_H_
2507 struct lpathconf_args {
2508 	char	*path;
2509 	int	name;
2510 };
2511 #endif
2512 int
2513 lpathconf(td, uap)
2514 	struct thread *td;
2515 	register struct lpathconf_args /* {
2516 		char *path;
2517 		int name;
2518 	} */ *uap;
2519 {
2520 
2521 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW));
2522 }
2523 
2524 int
2525 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
2526     u_long flags)
2527 {
2528 	struct nameidata nd;
2529 	int error, vfslocked;
2530 
2531 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1 |
2532 	    flags, pathseg, path, td);
2533 	if ((error = namei(&nd)) != 0)
2534 		return (error);
2535 	vfslocked = NDHASGIANT(&nd);
2536 	NDFREE(&nd, NDF_ONLY_PNBUF);
2537 
2538 	/* If asynchronous I/O is available, it works for all files. */
2539 	if (name == _PC_ASYNC_IO)
2540 		td->td_retval[0] = async_io_version;
2541 	else
2542 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2543 	vput(nd.ni_vp);
2544 	VFS_UNLOCK_GIANT(vfslocked);
2545 	return (error);
2546 }
2547 
2548 /*
2549  * Return target name of a symbolic link.
2550  */
2551 #ifndef _SYS_SYSPROTO_H_
2552 struct readlink_args {
2553 	char	*path;
2554 	char	*buf;
2555 	size_t	count;
2556 };
2557 #endif
2558 int
2559 readlink(td, uap)
2560 	struct thread *td;
2561 	register struct readlink_args /* {
2562 		char *path;
2563 		char *buf;
2564 		size_t count;
2565 	} */ *uap;
2566 {
2567 
2568 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2569 	    UIO_USERSPACE, uap->count));
2570 }
2571 #ifndef _SYS_SYSPROTO_H_
2572 struct readlinkat_args {
2573 	int	fd;
2574 	char	*path;
2575 	char	*buf;
2576 	size_t	bufsize;
2577 };
2578 #endif
2579 int
2580 readlinkat(struct thread *td, struct readlinkat_args *uap)
2581 {
2582 
2583 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
2584 	    uap->buf, UIO_USERSPACE, uap->bufsize));
2585 }
2586 
2587 int
2588 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2589     enum uio_seg bufseg, size_t count)
2590 {
2591 
2592 	return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg,
2593 	    count));
2594 }
2595 
2596 int
2597 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2598     char *buf, enum uio_seg bufseg, size_t count)
2599 {
2600 	struct vnode *vp;
2601 	struct iovec aiov;
2602 	struct uio auio;
2603 	int error;
2604 	struct nameidata nd;
2605 	int vfslocked;
2606 
2607 	if (count > INT_MAX)
2608 		return (EINVAL);
2609 
2610 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
2611 	    AUDITVNODE1, pathseg, path, fd, td);
2612 
2613 	if ((error = namei(&nd)) != 0)
2614 		return (error);
2615 	NDFREE(&nd, NDF_ONLY_PNBUF);
2616 	vfslocked = NDHASGIANT(&nd);
2617 	vp = nd.ni_vp;
2618 #ifdef MAC
2619 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2620 	if (error) {
2621 		vput(vp);
2622 		VFS_UNLOCK_GIANT(vfslocked);
2623 		return (error);
2624 	}
2625 #endif
2626 	if (vp->v_type != VLNK)
2627 		error = EINVAL;
2628 	else {
2629 		aiov.iov_base = buf;
2630 		aiov.iov_len = count;
2631 		auio.uio_iov = &aiov;
2632 		auio.uio_iovcnt = 1;
2633 		auio.uio_offset = 0;
2634 		auio.uio_rw = UIO_READ;
2635 		auio.uio_segflg = bufseg;
2636 		auio.uio_td = td;
2637 		auio.uio_resid = count;
2638 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2639 	}
2640 	vput(vp);
2641 	VFS_UNLOCK_GIANT(vfslocked);
2642 	td->td_retval[0] = count - auio.uio_resid;
2643 	return (error);
2644 }
2645 
2646 /*
2647  * Common implementation code for chflags() and fchflags().
2648  */
2649 static int
2650 setfflags(td, vp, flags)
2651 	struct thread *td;
2652 	struct vnode *vp;
2653 	int flags;
2654 {
2655 	int error;
2656 	struct mount *mp;
2657 	struct vattr vattr;
2658 
2659 	/*
2660 	 * Prevent non-root users from setting flags on devices.  When
2661 	 * a device is reused, users can retain ownership of the device
2662 	 * if they are allowed to set flags and programs assume that
2663 	 * chown can't fail when done as root.
2664 	 */
2665 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2666 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2667 		if (error)
2668 			return (error);
2669 	}
2670 
2671 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2672 		return (error);
2673 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2674 	VATTR_NULL(&vattr);
2675 	vattr.va_flags = flags;
2676 #ifdef MAC
2677 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2678 	if (error == 0)
2679 #endif
2680 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2681 	VOP_UNLOCK(vp, 0);
2682 	vn_finished_write(mp);
2683 	return (error);
2684 }
2685 
2686 /*
2687  * Change flags of a file given a path name.
2688  */
2689 #ifndef _SYS_SYSPROTO_H_
2690 struct chflags_args {
2691 	char	*path;
2692 	int	flags;
2693 };
2694 #endif
2695 int
2696 chflags(td, uap)
2697 	struct thread *td;
2698 	register struct chflags_args /* {
2699 		char *path;
2700 		int flags;
2701 	} */ *uap;
2702 {
2703 	int error;
2704 	struct nameidata nd;
2705 	int vfslocked;
2706 
2707 	AUDIT_ARG_FFLAGS(uap->flags);
2708 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2709 	    uap->path, td);
2710 	if ((error = namei(&nd)) != 0)
2711 		return (error);
2712 	NDFREE(&nd, NDF_ONLY_PNBUF);
2713 	vfslocked = NDHASGIANT(&nd);
2714 	error = setfflags(td, nd.ni_vp, uap->flags);
2715 	vrele(nd.ni_vp);
2716 	VFS_UNLOCK_GIANT(vfslocked);
2717 	return (error);
2718 }
2719 
2720 /*
2721  * Same as chflags() but doesn't follow symlinks.
2722  */
2723 int
2724 lchflags(td, uap)
2725 	struct thread *td;
2726 	register struct lchflags_args /* {
2727 		char *path;
2728 		int flags;
2729 	} */ *uap;
2730 {
2731 	int error;
2732 	struct nameidata nd;
2733 	int vfslocked;
2734 
2735 	AUDIT_ARG_FFLAGS(uap->flags);
2736 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2737 	    uap->path, td);
2738 	if ((error = namei(&nd)) != 0)
2739 		return (error);
2740 	vfslocked = NDHASGIANT(&nd);
2741 	NDFREE(&nd, NDF_ONLY_PNBUF);
2742 	error = setfflags(td, nd.ni_vp, uap->flags);
2743 	vrele(nd.ni_vp);
2744 	VFS_UNLOCK_GIANT(vfslocked);
2745 	return (error);
2746 }
2747 
2748 /*
2749  * Change flags of a file given a file descriptor.
2750  */
2751 #ifndef _SYS_SYSPROTO_H_
2752 struct fchflags_args {
2753 	int	fd;
2754 	int	flags;
2755 };
2756 #endif
2757 int
2758 fchflags(td, uap)
2759 	struct thread *td;
2760 	register struct fchflags_args /* {
2761 		int fd;
2762 		int flags;
2763 	} */ *uap;
2764 {
2765 	struct file *fp;
2766 	int vfslocked;
2767 	int error;
2768 
2769 	AUDIT_ARG_FD(uap->fd);
2770 	AUDIT_ARG_FFLAGS(uap->flags);
2771 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2772 		return (error);
2773 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2774 #ifdef AUDIT
2775 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2776 	AUDIT_ARG_VNODE1(fp->f_vnode);
2777 	VOP_UNLOCK(fp->f_vnode, 0);
2778 #endif
2779 	error = setfflags(td, fp->f_vnode, uap->flags);
2780 	VFS_UNLOCK_GIANT(vfslocked);
2781 	fdrop(fp, td);
2782 	return (error);
2783 }
2784 
2785 /*
2786  * Common implementation code for chmod(), lchmod() and fchmod().
2787  */
2788 static int
2789 setfmode(td, vp, mode)
2790 	struct thread *td;
2791 	struct vnode *vp;
2792 	int mode;
2793 {
2794 	int error;
2795 	struct mount *mp;
2796 	struct vattr vattr;
2797 
2798 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2799 		return (error);
2800 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2801 	VATTR_NULL(&vattr);
2802 	vattr.va_mode = mode & ALLPERMS;
2803 #ifdef MAC
2804 	error = mac_vnode_check_setmode(td->td_ucred, vp, vattr.va_mode);
2805 	if (error == 0)
2806 #endif
2807 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2808 	VOP_UNLOCK(vp, 0);
2809 	vn_finished_write(mp);
2810 	return (error);
2811 }
2812 
2813 /*
2814  * Change mode of a file given path name.
2815  */
2816 #ifndef _SYS_SYSPROTO_H_
2817 struct chmod_args {
2818 	char	*path;
2819 	int	mode;
2820 };
2821 #endif
2822 int
2823 chmod(td, uap)
2824 	struct thread *td;
2825 	register struct chmod_args /* {
2826 		char *path;
2827 		int mode;
2828 	} */ *uap;
2829 {
2830 
2831 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2832 }
2833 
2834 #ifndef _SYS_SYSPROTO_H_
2835 struct fchmodat_args {
2836 	int	dirfd;
2837 	char	*path;
2838 	mode_t	mode;
2839 	int	flag;
2840 }
2841 #endif
2842 int
2843 fchmodat(struct thread *td, struct fchmodat_args *uap)
2844 {
2845 	int flag = uap->flag;
2846 	int fd = uap->fd;
2847 	char *path = uap->path;
2848 	mode_t mode = uap->mode;
2849 
2850 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2851 		return (EINVAL);
2852 
2853 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2854 }
2855 
2856 int
2857 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2858 {
2859 
2860 	return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0));
2861 }
2862 
2863 /*
2864  * Change mode of a file given path name (don't follow links.)
2865  */
2866 #ifndef _SYS_SYSPROTO_H_
2867 struct lchmod_args {
2868 	char	*path;
2869 	int	mode;
2870 };
2871 #endif
2872 int
2873 lchmod(td, uap)
2874 	struct thread *td;
2875 	register struct lchmod_args /* {
2876 		char *path;
2877 		int mode;
2878 	} */ *uap;
2879 {
2880 
2881 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2882 	    uap->mode, AT_SYMLINK_NOFOLLOW));
2883 }
2884 
2885 
2886 int
2887 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2888     mode_t mode, int flag)
2889 {
2890 	int error;
2891 	struct nameidata nd;
2892 	int vfslocked;
2893 	int follow;
2894 
2895 	AUDIT_ARG_MODE(mode);
2896 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2897 	NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
2898 	    fd, td);
2899 	if ((error = namei(&nd)) != 0)
2900 		return (error);
2901 	vfslocked = NDHASGIANT(&nd);
2902 	NDFREE(&nd, NDF_ONLY_PNBUF);
2903 	error = setfmode(td, nd.ni_vp, mode);
2904 	vrele(nd.ni_vp);
2905 	VFS_UNLOCK_GIANT(vfslocked);
2906 	return (error);
2907 }
2908 
2909 /*
2910  * Change mode of a file given a file descriptor.
2911  */
2912 #ifndef _SYS_SYSPROTO_H_
2913 struct fchmod_args {
2914 	int	fd;
2915 	int	mode;
2916 };
2917 #endif
2918 int
2919 fchmod(td, uap)
2920 	struct thread *td;
2921 	register struct fchmod_args /* {
2922 		int fd;
2923 		int mode;
2924 	} */ *uap;
2925 {
2926 	struct file *fp;
2927 	int vfslocked;
2928 	int error;
2929 
2930 	AUDIT_ARG_FD(uap->fd);
2931 	AUDIT_ARG_MODE(uap->mode);
2932 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2933 		return (error);
2934 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2935 #ifdef AUDIT
2936 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2937 	AUDIT_ARG_VNODE1(fp->f_vnode);
2938 	VOP_UNLOCK(fp->f_vnode, 0);
2939 #endif
2940 	error = setfmode(td, fp->f_vnode, uap->mode);
2941 	VFS_UNLOCK_GIANT(vfslocked);
2942 	fdrop(fp, td);
2943 	return (error);
2944 }
2945 
2946 /*
2947  * Common implementation for chown(), lchown(), and fchown()
2948  */
2949 static int
2950 setfown(td, vp, uid, gid)
2951 	struct thread *td;
2952 	struct vnode *vp;
2953 	uid_t uid;
2954 	gid_t gid;
2955 {
2956 	int error;
2957 	struct mount *mp;
2958 	struct vattr vattr;
2959 
2960 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2961 		return (error);
2962 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2963 	VATTR_NULL(&vattr);
2964 	vattr.va_uid = uid;
2965 	vattr.va_gid = gid;
2966 #ifdef MAC
2967 	error = mac_vnode_check_setowner(td->td_ucred, vp, vattr.va_uid,
2968 	    vattr.va_gid);
2969 	if (error == 0)
2970 #endif
2971 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2972 	VOP_UNLOCK(vp, 0);
2973 	vn_finished_write(mp);
2974 	return (error);
2975 }
2976 
2977 /*
2978  * Set ownership given a path name.
2979  */
2980 #ifndef _SYS_SYSPROTO_H_
2981 struct chown_args {
2982 	char	*path;
2983 	int	uid;
2984 	int	gid;
2985 };
2986 #endif
2987 int
2988 chown(td, uap)
2989 	struct thread *td;
2990 	register struct chown_args /* {
2991 		char *path;
2992 		int uid;
2993 		int gid;
2994 	} */ *uap;
2995 {
2996 
2997 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2998 }
2999 
3000 #ifndef _SYS_SYSPROTO_H_
3001 struct fchownat_args {
3002 	int fd;
3003 	const char * path;
3004 	uid_t uid;
3005 	gid_t gid;
3006 	int flag;
3007 };
3008 #endif
3009 int
3010 fchownat(struct thread *td, struct fchownat_args *uap)
3011 {
3012 	int flag;
3013 
3014 	flag = uap->flag;
3015 	if (flag & ~AT_SYMLINK_NOFOLLOW)
3016 		return (EINVAL);
3017 
3018 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
3019 	    uap->gid, uap->flag));
3020 }
3021 
3022 int
3023 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3024     int gid)
3025 {
3026 
3027 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0));
3028 }
3029 
3030 int
3031 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3032     int uid, int gid, int flag)
3033 {
3034 	struct nameidata nd;
3035 	int error, vfslocked, follow;
3036 
3037 	AUDIT_ARG_OWNER(uid, gid);
3038 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3039 	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
3040 	    fd, td);
3041 
3042 	if ((error = namei(&nd)) != 0)
3043 		return (error);
3044 	vfslocked = NDHASGIANT(&nd);
3045 	NDFREE(&nd, NDF_ONLY_PNBUF);
3046 	error = setfown(td, nd.ni_vp, uid, gid);
3047 	vrele(nd.ni_vp);
3048 	VFS_UNLOCK_GIANT(vfslocked);
3049 	return (error);
3050 }
3051 
3052 /*
3053  * Set ownership given a path name, do not cross symlinks.
3054  */
3055 #ifndef _SYS_SYSPROTO_H_
3056 struct lchown_args {
3057 	char	*path;
3058 	int	uid;
3059 	int	gid;
3060 };
3061 #endif
3062 int
3063 lchown(td, uap)
3064 	struct thread *td;
3065 	register struct lchown_args /* {
3066 		char *path;
3067 		int uid;
3068 		int gid;
3069 	} */ *uap;
3070 {
3071 
3072 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
3073 }
3074 
3075 int
3076 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
3077     int gid)
3078 {
3079 
3080 	return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid,
3081 	    AT_SYMLINK_NOFOLLOW));
3082 }
3083 
3084 /*
3085  * Set ownership given a file descriptor.
3086  */
3087 #ifndef _SYS_SYSPROTO_H_
3088 struct fchown_args {
3089 	int	fd;
3090 	int	uid;
3091 	int	gid;
3092 };
3093 #endif
3094 int
3095 fchown(td, uap)
3096 	struct thread *td;
3097 	register struct fchown_args /* {
3098 		int fd;
3099 		int uid;
3100 		int gid;
3101 	} */ *uap;
3102 {
3103 	struct file *fp;
3104 	int vfslocked;
3105 	int error;
3106 
3107 	AUDIT_ARG_FD(uap->fd);
3108 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
3109 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3110 		return (error);
3111 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3112 #ifdef AUDIT
3113 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3114 	AUDIT_ARG_VNODE1(fp->f_vnode);
3115 	VOP_UNLOCK(fp->f_vnode, 0);
3116 #endif
3117 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
3118 	VFS_UNLOCK_GIANT(vfslocked);
3119 	fdrop(fp, td);
3120 	return (error);
3121 }
3122 
3123 /*
3124  * Common implementation code for utimes(), lutimes(), and futimes().
3125  */
3126 static int
3127 getutimes(usrtvp, tvpseg, tsp)
3128 	const struct timeval *usrtvp;
3129 	enum uio_seg tvpseg;
3130 	struct timespec *tsp;
3131 {
3132 	struct timeval tv[2];
3133 	const struct timeval *tvp;
3134 	int error;
3135 
3136 	if (usrtvp == NULL) {
3137 		microtime(&tv[0]);
3138 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3139 		tsp[1] = tsp[0];
3140 	} else {
3141 		if (tvpseg == UIO_SYSSPACE) {
3142 			tvp = usrtvp;
3143 		} else {
3144 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
3145 				return (error);
3146 			tvp = tv;
3147 		}
3148 
3149 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
3150 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
3151 			return (EINVAL);
3152 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
3153 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
3154 	}
3155 	return (0);
3156 }
3157 
3158 /*
3159  * Common implementation code for utimes(), lutimes(), and futimes().
3160  */
3161 static int
3162 setutimes(td, vp, ts, numtimes, nullflag)
3163 	struct thread *td;
3164 	struct vnode *vp;
3165 	const struct timespec *ts;
3166 	int numtimes;
3167 	int nullflag;
3168 {
3169 	int error, setbirthtime;
3170 	struct mount *mp;
3171 	struct vattr vattr;
3172 
3173 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3174 		return (error);
3175 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3176 	setbirthtime = 0;
3177 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3178 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3179 		setbirthtime = 1;
3180 	VATTR_NULL(&vattr);
3181 	vattr.va_atime = ts[0];
3182 	vattr.va_mtime = ts[1];
3183 	if (setbirthtime)
3184 		vattr.va_birthtime = ts[1];
3185 	if (numtimes > 2)
3186 		vattr.va_birthtime = ts[2];
3187 	if (nullflag)
3188 		vattr.va_vaflags |= VA_UTIMES_NULL;
3189 #ifdef MAC
3190 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3191 	    vattr.va_mtime);
3192 #endif
3193 	if (error == 0)
3194 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3195 	VOP_UNLOCK(vp, 0);
3196 	vn_finished_write(mp);
3197 	return (error);
3198 }
3199 
3200 /*
3201  * Set the access and modification times of a file.
3202  */
3203 #ifndef _SYS_SYSPROTO_H_
3204 struct utimes_args {
3205 	char	*path;
3206 	struct	timeval *tptr;
3207 };
3208 #endif
3209 int
3210 utimes(td, uap)
3211 	struct thread *td;
3212 	register struct utimes_args /* {
3213 		char *path;
3214 		struct timeval *tptr;
3215 	} */ *uap;
3216 {
3217 
3218 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3219 	    UIO_USERSPACE));
3220 }
3221 
3222 #ifndef _SYS_SYSPROTO_H_
3223 struct futimesat_args {
3224 	int fd;
3225 	const char * path;
3226 	const struct timeval * times;
3227 };
3228 #endif
3229 int
3230 futimesat(struct thread *td, struct futimesat_args *uap)
3231 {
3232 
3233 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3234 	    uap->times, UIO_USERSPACE));
3235 }
3236 
3237 int
3238 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
3239     struct timeval *tptr, enum uio_seg tptrseg)
3240 {
3241 
3242 	return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg));
3243 }
3244 
3245 int
3246 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3247     struct timeval *tptr, enum uio_seg tptrseg)
3248 {
3249 	struct nameidata nd;
3250 	struct timespec ts[2];
3251 	int error, vfslocked;
3252 
3253 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3254 		return (error);
3255 	NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
3256 	    fd, td);
3257 
3258 	if ((error = namei(&nd)) != 0)
3259 		return (error);
3260 	vfslocked = NDHASGIANT(&nd);
3261 	NDFREE(&nd, NDF_ONLY_PNBUF);
3262 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3263 	vrele(nd.ni_vp);
3264 	VFS_UNLOCK_GIANT(vfslocked);
3265 	return (error);
3266 }
3267 
3268 /*
3269  * Set the access and modification times of a file.
3270  */
3271 #ifndef _SYS_SYSPROTO_H_
3272 struct lutimes_args {
3273 	char	*path;
3274 	struct	timeval *tptr;
3275 };
3276 #endif
3277 int
3278 lutimes(td, uap)
3279 	struct thread *td;
3280 	register struct lutimes_args /* {
3281 		char *path;
3282 		struct timeval *tptr;
3283 	} */ *uap;
3284 {
3285 
3286 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3287 	    UIO_USERSPACE));
3288 }
3289 
3290 int
3291 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3292     struct timeval *tptr, enum uio_seg tptrseg)
3293 {
3294 	struct timespec ts[2];
3295 	int error;
3296 	struct nameidata nd;
3297 	int vfslocked;
3298 
3299 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3300 		return (error);
3301 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3302 	if ((error = namei(&nd)) != 0)
3303 		return (error);
3304 	vfslocked = NDHASGIANT(&nd);
3305 	NDFREE(&nd, NDF_ONLY_PNBUF);
3306 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3307 	vrele(nd.ni_vp);
3308 	VFS_UNLOCK_GIANT(vfslocked);
3309 	return (error);
3310 }
3311 
3312 /*
3313  * Set the access and modification times of a file.
3314  */
3315 #ifndef _SYS_SYSPROTO_H_
3316 struct futimes_args {
3317 	int	fd;
3318 	struct	timeval *tptr;
3319 };
3320 #endif
3321 int
3322 futimes(td, uap)
3323 	struct thread *td;
3324 	register struct futimes_args /* {
3325 		int  fd;
3326 		struct timeval *tptr;
3327 	} */ *uap;
3328 {
3329 
3330 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3331 }
3332 
3333 int
3334 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3335     enum uio_seg tptrseg)
3336 {
3337 	struct timespec ts[2];
3338 	struct file *fp;
3339 	int vfslocked;
3340 	int error;
3341 
3342 	AUDIT_ARG_FD(fd);
3343 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3344 		return (error);
3345 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
3346 		return (error);
3347 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
3348 #ifdef AUDIT
3349 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3350 	AUDIT_ARG_VNODE1(fp->f_vnode);
3351 	VOP_UNLOCK(fp->f_vnode, 0);
3352 #endif
3353 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3354 	VFS_UNLOCK_GIANT(vfslocked);
3355 	fdrop(fp, td);
3356 	return (error);
3357 }
3358 
3359 /*
3360  * Truncate a file given its path name.
3361  */
3362 #ifndef _SYS_SYSPROTO_H_
3363 struct truncate_args {
3364 	char	*path;
3365 	int	pad;
3366 	off_t	length;
3367 };
3368 #endif
3369 int
3370 truncate(td, uap)
3371 	struct thread *td;
3372 	register struct truncate_args /* {
3373 		char *path;
3374 		int pad;
3375 		off_t length;
3376 	} */ *uap;
3377 {
3378 
3379 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3380 }
3381 
3382 int
3383 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3384 {
3385 	struct mount *mp;
3386 	struct vnode *vp;
3387 	struct vattr vattr;
3388 	int error;
3389 	struct nameidata nd;
3390 	int vfslocked;
3391 
3392 	if (length < 0)
3393 		return(EINVAL);
3394 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3395 	if ((error = namei(&nd)) != 0)
3396 		return (error);
3397 	vfslocked = NDHASGIANT(&nd);
3398 	vp = nd.ni_vp;
3399 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3400 		vrele(vp);
3401 		VFS_UNLOCK_GIANT(vfslocked);
3402 		return (error);
3403 	}
3404 	NDFREE(&nd, NDF_ONLY_PNBUF);
3405 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3406 	if (vp->v_type == VDIR)
3407 		error = EISDIR;
3408 #ifdef MAC
3409 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3410 	}
3411 #endif
3412 	else if ((error = vn_writechk(vp)) == 0 &&
3413 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3414 		VATTR_NULL(&vattr);
3415 		vattr.va_size = length;
3416 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3417 	}
3418 	vput(vp);
3419 	vn_finished_write(mp);
3420 	VFS_UNLOCK_GIANT(vfslocked);
3421 	return (error);
3422 }
3423 
3424 #if defined(COMPAT_43)
3425 /*
3426  * Truncate a file given its path name.
3427  */
3428 #ifndef _SYS_SYSPROTO_H_
3429 struct otruncate_args {
3430 	char	*path;
3431 	long	length;
3432 };
3433 #endif
3434 int
3435 otruncate(td, uap)
3436 	struct thread *td;
3437 	register struct otruncate_args /* {
3438 		char *path;
3439 		long length;
3440 	} */ *uap;
3441 {
3442 	struct truncate_args /* {
3443 		char *path;
3444 		int pad;
3445 		off_t length;
3446 	} */ nuap;
3447 
3448 	nuap.path = uap->path;
3449 	nuap.length = uap->length;
3450 	return (truncate(td, &nuap));
3451 }
3452 #endif /* COMPAT_43 */
3453 
3454 /* Versions with the pad argument */
3455 int
3456 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3457 {
3458 	struct truncate_args ouap;
3459 
3460 	ouap.path = uap->path;
3461 	ouap.length = uap->length;
3462 	return (truncate(td, &ouap));
3463 }
3464 
3465 int
3466 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3467 {
3468 	struct ftruncate_args ouap;
3469 
3470 	ouap.fd = uap->fd;
3471 	ouap.length = uap->length;
3472 	return (ftruncate(td, &ouap));
3473 }
3474 
3475 /*
3476  * Sync an open file.
3477  */
3478 #ifndef _SYS_SYSPROTO_H_
3479 struct fsync_args {
3480 	int	fd;
3481 };
3482 #endif
3483 int
3484 fsync(td, uap)
3485 	struct thread *td;
3486 	struct fsync_args /* {
3487 		int fd;
3488 	} */ *uap;
3489 {
3490 	struct vnode *vp;
3491 	struct mount *mp;
3492 	struct file *fp;
3493 	int vfslocked;
3494 	int error, lock_flags;
3495 
3496 	AUDIT_ARG_FD(uap->fd);
3497 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3498 		return (error);
3499 	vp = fp->f_vnode;
3500 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3501 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3502 		goto drop;
3503 	if (MNT_SHARED_WRITES(mp) ||
3504 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
3505 		lock_flags = LK_SHARED;
3506 	} else {
3507 		lock_flags = LK_EXCLUSIVE;
3508 	}
3509 	vn_lock(vp, lock_flags | LK_RETRY);
3510 	AUDIT_ARG_VNODE1(vp);
3511 	if (vp->v_object != NULL) {
3512 		VM_OBJECT_LOCK(vp->v_object);
3513 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3514 		VM_OBJECT_UNLOCK(vp->v_object);
3515 	}
3516 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3517 
3518 	VOP_UNLOCK(vp, 0);
3519 	vn_finished_write(mp);
3520 drop:
3521 	VFS_UNLOCK_GIANT(vfslocked);
3522 	fdrop(fp, td);
3523 	return (error);
3524 }
3525 
3526 /*
3527  * Rename files.  Source and destination must either both be directories, or
3528  * both not be directories.  If target is a directory, it must be empty.
3529  */
3530 #ifndef _SYS_SYSPROTO_H_
3531 struct rename_args {
3532 	char	*from;
3533 	char	*to;
3534 };
3535 #endif
3536 int
3537 rename(td, uap)
3538 	struct thread *td;
3539 	register struct rename_args /* {
3540 		char *from;
3541 		char *to;
3542 	} */ *uap;
3543 {
3544 
3545 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3546 }
3547 
3548 #ifndef _SYS_SYSPROTO_H_
3549 struct renameat_args {
3550 	int	oldfd;
3551 	char	*old;
3552 	int	newfd;
3553 	char	*new;
3554 };
3555 #endif
3556 int
3557 renameat(struct thread *td, struct renameat_args *uap)
3558 {
3559 
3560 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3561 	    UIO_USERSPACE));
3562 }
3563 
3564 int
3565 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3566 {
3567 
3568 	return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg));
3569 }
3570 
3571 int
3572 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3573     enum uio_seg pathseg)
3574 {
3575 	struct mount *mp = NULL;
3576 	struct vnode *tvp, *fvp, *tdvp;
3577 	struct nameidata fromnd, tond;
3578 	int tvfslocked;
3579 	int fvfslocked;
3580 	int error;
3581 
3582 	bwillwrite();
3583 #ifdef MAC
3584 	NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3585 	    AUDITVNODE1, pathseg, old, oldfd, td);
3586 #else
3587 	NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3588 	    AUDITVNODE1, pathseg, old, oldfd, td);
3589 #endif
3590 
3591 	if ((error = namei(&fromnd)) != 0)
3592 		return (error);
3593 	fvfslocked = NDHASGIANT(&fromnd);
3594 	tvfslocked = 0;
3595 #ifdef MAC
3596 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3597 	    fromnd.ni_vp, &fromnd.ni_cnd);
3598 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3599 	if (fromnd.ni_dvp != fromnd.ni_vp)
3600 		VOP_UNLOCK(fromnd.ni_vp, 0);
3601 #endif
3602 	fvp = fromnd.ni_vp;
3603 	if (error == 0)
3604 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3605 	if (error != 0) {
3606 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3607 		vrele(fromnd.ni_dvp);
3608 		vrele(fvp);
3609 		goto out1;
3610 	}
3611 	NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3612 	    MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
3613 	if (fromnd.ni_vp->v_type == VDIR)
3614 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3615 	if ((error = namei(&tond)) != 0) {
3616 		/* Translate error code for rename("dir1", "dir2/."). */
3617 		if (error == EISDIR && fvp->v_type == VDIR)
3618 			error = EINVAL;
3619 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3620 		vrele(fromnd.ni_dvp);
3621 		vrele(fvp);
3622 		vn_finished_write(mp);
3623 		goto out1;
3624 	}
3625 	tvfslocked = NDHASGIANT(&tond);
3626 	tdvp = tond.ni_dvp;
3627 	tvp = tond.ni_vp;
3628 	if (tvp != NULL) {
3629 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3630 			error = ENOTDIR;
3631 			goto out;
3632 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3633 			error = EISDIR;
3634 			goto out;
3635 		}
3636 	}
3637 	if (fvp == tdvp) {
3638 		error = EINVAL;
3639 		goto out;
3640 	}
3641 	/*
3642 	 * If the source is the same as the destination (that is, if they
3643 	 * are links to the same vnode), then there is nothing to do.
3644 	 */
3645 	if (fvp == tvp)
3646 		error = -1;
3647 #ifdef MAC
3648 	else
3649 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3650 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3651 #endif
3652 out:
3653 	if (!error) {
3654 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3655 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3656 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3657 		NDFREE(&tond, NDF_ONLY_PNBUF);
3658 	} else {
3659 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3660 		NDFREE(&tond, NDF_ONLY_PNBUF);
3661 		if (tvp)
3662 			vput(tvp);
3663 		if (tdvp == tvp)
3664 			vrele(tdvp);
3665 		else
3666 			vput(tdvp);
3667 		vrele(fromnd.ni_dvp);
3668 		vrele(fvp);
3669 	}
3670 	vrele(tond.ni_startdir);
3671 	vn_finished_write(mp);
3672 out1:
3673 	if (fromnd.ni_startdir)
3674 		vrele(fromnd.ni_startdir);
3675 	VFS_UNLOCK_GIANT(fvfslocked);
3676 	VFS_UNLOCK_GIANT(tvfslocked);
3677 	if (error == -1)
3678 		return (0);
3679 	return (error);
3680 }
3681 
3682 /*
3683  * Make a directory file.
3684  */
3685 #ifndef _SYS_SYSPROTO_H_
3686 struct mkdir_args {
3687 	char	*path;
3688 	int	mode;
3689 };
3690 #endif
3691 int
3692 mkdir(td, uap)
3693 	struct thread *td;
3694 	register struct mkdir_args /* {
3695 		char *path;
3696 		int mode;
3697 	} */ *uap;
3698 {
3699 
3700 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3701 }
3702 
3703 #ifndef _SYS_SYSPROTO_H_
3704 struct mkdirat_args {
3705 	int	fd;
3706 	char	*path;
3707 	mode_t	mode;
3708 };
3709 #endif
3710 int
3711 mkdirat(struct thread *td, struct mkdirat_args *uap)
3712 {
3713 
3714 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3715 }
3716 
3717 int
3718 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3719 {
3720 
3721 	return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode));
3722 }
3723 
3724 int
3725 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3726     int mode)
3727 {
3728 	struct mount *mp;
3729 	struct vnode *vp;
3730 	struct vattr vattr;
3731 	int error;
3732 	struct nameidata nd;
3733 	int vfslocked;
3734 
3735 	AUDIT_ARG_MODE(mode);
3736 restart:
3737 	bwillwrite();
3738 	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3739 	    segflg, path, fd, td);
3740 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3741 	if ((error = namei(&nd)) != 0)
3742 		return (error);
3743 	vfslocked = NDHASGIANT(&nd);
3744 	vp = nd.ni_vp;
3745 	if (vp != NULL) {
3746 		NDFREE(&nd, NDF_ONLY_PNBUF);
3747 		/*
3748 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3749 		 * the strange behaviour of leaving the vnode unlocked
3750 		 * if the target is the same vnode as the parent.
3751 		 */
3752 		if (vp == nd.ni_dvp)
3753 			vrele(nd.ni_dvp);
3754 		else
3755 			vput(nd.ni_dvp);
3756 		vrele(vp);
3757 		VFS_UNLOCK_GIANT(vfslocked);
3758 		return (EEXIST);
3759 	}
3760 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3761 		NDFREE(&nd, NDF_ONLY_PNBUF);
3762 		vput(nd.ni_dvp);
3763 		VFS_UNLOCK_GIANT(vfslocked);
3764 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3765 			return (error);
3766 		goto restart;
3767 	}
3768 	VATTR_NULL(&vattr);
3769 	vattr.va_type = VDIR;
3770 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3771 #ifdef MAC
3772 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3773 	    &vattr);
3774 	if (error)
3775 		goto out;
3776 #endif
3777 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3778 #ifdef MAC
3779 out:
3780 #endif
3781 	NDFREE(&nd, NDF_ONLY_PNBUF);
3782 	vput(nd.ni_dvp);
3783 	if (!error)
3784 		vput(nd.ni_vp);
3785 	vn_finished_write(mp);
3786 	VFS_UNLOCK_GIANT(vfslocked);
3787 	return (error);
3788 }
3789 
3790 /*
3791  * Remove a directory file.
3792  */
3793 #ifndef _SYS_SYSPROTO_H_
3794 struct rmdir_args {
3795 	char	*path;
3796 };
3797 #endif
3798 int
3799 rmdir(td, uap)
3800 	struct thread *td;
3801 	struct rmdir_args /* {
3802 		char *path;
3803 	} */ *uap;
3804 {
3805 
3806 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3807 }
3808 
3809 int
3810 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3811 {
3812 
3813 	return (kern_rmdirat(td, AT_FDCWD, path, pathseg));
3814 }
3815 
3816 int
3817 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3818 {
3819 	struct mount *mp;
3820 	struct vnode *vp;
3821 	int error;
3822 	struct nameidata nd;
3823 	int vfslocked;
3824 
3825 restart:
3826 	bwillwrite();
3827 	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3828 	    pathseg, path, fd, td);
3829 	if ((error = namei(&nd)) != 0)
3830 		return (error);
3831 	vfslocked = NDHASGIANT(&nd);
3832 	vp = nd.ni_vp;
3833 	if (vp->v_type != VDIR) {
3834 		error = ENOTDIR;
3835 		goto out;
3836 	}
3837 	/*
3838 	 * No rmdir "." please.
3839 	 */
3840 	if (nd.ni_dvp == vp) {
3841 		error = EINVAL;
3842 		goto out;
3843 	}
3844 	/*
3845 	 * The root of a mounted filesystem cannot be deleted.
3846 	 */
3847 	if (vp->v_vflag & VV_ROOT) {
3848 		error = EBUSY;
3849 		goto out;
3850 	}
3851 #ifdef MAC
3852 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3853 	    &nd.ni_cnd);
3854 	if (error)
3855 		goto out;
3856 #endif
3857 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3858 		NDFREE(&nd, NDF_ONLY_PNBUF);
3859 		vput(vp);
3860 		if (nd.ni_dvp == vp)
3861 			vrele(nd.ni_dvp);
3862 		else
3863 			vput(nd.ni_dvp);
3864 		VFS_UNLOCK_GIANT(vfslocked);
3865 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3866 			return (error);
3867 		goto restart;
3868 	}
3869 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3870 	vn_finished_write(mp);
3871 out:
3872 	NDFREE(&nd, NDF_ONLY_PNBUF);
3873 	vput(vp);
3874 	if (nd.ni_dvp == vp)
3875 		vrele(nd.ni_dvp);
3876 	else
3877 		vput(nd.ni_dvp);
3878 	VFS_UNLOCK_GIANT(vfslocked);
3879 	return (error);
3880 }
3881 
3882 #ifdef COMPAT_43
3883 /*
3884  * Read a block of directory entries in a filesystem independent format.
3885  */
3886 #ifndef _SYS_SYSPROTO_H_
3887 struct ogetdirentries_args {
3888 	int	fd;
3889 	char	*buf;
3890 	u_int	count;
3891 	long	*basep;
3892 };
3893 #endif
3894 int
3895 ogetdirentries(td, uap)
3896 	struct thread *td;
3897 	register struct ogetdirentries_args /* {
3898 		int fd;
3899 		char *buf;
3900 		u_int count;
3901 		long *basep;
3902 	} */ *uap;
3903 {
3904 	struct vnode *vp;
3905 	struct file *fp;
3906 	struct uio auio, kuio;
3907 	struct iovec aiov, kiov;
3908 	struct dirent *dp, *edp;
3909 	caddr_t dirbuf;
3910 	int error, eofflag, readcnt, vfslocked;
3911 	long loff;
3912 
3913 	/* XXX arbitrary sanity limit on `count'. */
3914 	if (uap->count > 64 * 1024)
3915 		return (EINVAL);
3916 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3917 		return (error);
3918 	if ((fp->f_flag & FREAD) == 0) {
3919 		fdrop(fp, td);
3920 		return (EBADF);
3921 	}
3922 	vp = fp->f_vnode;
3923 unionread:
3924 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3925 	if (vp->v_type != VDIR) {
3926 		VFS_UNLOCK_GIANT(vfslocked);
3927 		fdrop(fp, td);
3928 		return (EINVAL);
3929 	}
3930 	aiov.iov_base = uap->buf;
3931 	aiov.iov_len = uap->count;
3932 	auio.uio_iov = &aiov;
3933 	auio.uio_iovcnt = 1;
3934 	auio.uio_rw = UIO_READ;
3935 	auio.uio_segflg = UIO_USERSPACE;
3936 	auio.uio_td = td;
3937 	auio.uio_resid = uap->count;
3938 	vn_lock(vp, LK_SHARED | LK_RETRY);
3939 	loff = auio.uio_offset = fp->f_offset;
3940 #ifdef MAC
3941 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3942 	if (error) {
3943 		VOP_UNLOCK(vp, 0);
3944 		VFS_UNLOCK_GIANT(vfslocked);
3945 		fdrop(fp, td);
3946 		return (error);
3947 	}
3948 #endif
3949 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3950 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3951 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3952 			    NULL, NULL);
3953 			fp->f_offset = auio.uio_offset;
3954 		} else
3955 #	endif
3956 	{
3957 		kuio = auio;
3958 		kuio.uio_iov = &kiov;
3959 		kuio.uio_segflg = UIO_SYSSPACE;
3960 		kiov.iov_len = uap->count;
3961 		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
3962 		kiov.iov_base = dirbuf;
3963 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3964 			    NULL, NULL);
3965 		fp->f_offset = kuio.uio_offset;
3966 		if (error == 0) {
3967 			readcnt = uap->count - kuio.uio_resid;
3968 			edp = (struct dirent *)&dirbuf[readcnt];
3969 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3970 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3971 					/*
3972 					 * The expected low byte of
3973 					 * dp->d_namlen is our dp->d_type.
3974 					 * The high MBZ byte of dp->d_namlen
3975 					 * is our dp->d_namlen.
3976 					 */
3977 					dp->d_type = dp->d_namlen;
3978 					dp->d_namlen = 0;
3979 #				else
3980 					/*
3981 					 * The dp->d_type is the high byte
3982 					 * of the expected dp->d_namlen,
3983 					 * so must be zero'ed.
3984 					 */
3985 					dp->d_type = 0;
3986 #				endif
3987 				if (dp->d_reclen > 0) {
3988 					dp = (struct dirent *)
3989 					    ((char *)dp + dp->d_reclen);
3990 				} else {
3991 					error = EIO;
3992 					break;
3993 				}
3994 			}
3995 			if (dp >= edp)
3996 				error = uiomove(dirbuf, readcnt, &auio);
3997 		}
3998 		free(dirbuf, M_TEMP);
3999 	}
4000 	if (error) {
4001 		VOP_UNLOCK(vp, 0);
4002 		VFS_UNLOCK_GIANT(vfslocked);
4003 		fdrop(fp, td);
4004 		return (error);
4005 	}
4006 	if (uap->count == auio.uio_resid &&
4007 	    (vp->v_vflag & VV_ROOT) &&
4008 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4009 		struct vnode *tvp = vp;
4010 		vp = vp->v_mount->mnt_vnodecovered;
4011 		VREF(vp);
4012 		fp->f_vnode = vp;
4013 		fp->f_data = vp;
4014 		fp->f_offset = 0;
4015 		vput(tvp);
4016 		VFS_UNLOCK_GIANT(vfslocked);
4017 		goto unionread;
4018 	}
4019 	VOP_UNLOCK(vp, 0);
4020 	VFS_UNLOCK_GIANT(vfslocked);
4021 	error = copyout(&loff, uap->basep, sizeof(long));
4022 	fdrop(fp, td);
4023 	td->td_retval[0] = uap->count - auio.uio_resid;
4024 	return (error);
4025 }
4026 #endif /* COMPAT_43 */
4027 
4028 /*
4029  * Read a block of directory entries in a filesystem independent format.
4030  */
4031 #ifndef _SYS_SYSPROTO_H_
4032 struct getdirentries_args {
4033 	int	fd;
4034 	char	*buf;
4035 	u_int	count;
4036 	long	*basep;
4037 };
4038 #endif
4039 int
4040 getdirentries(td, uap)
4041 	struct thread *td;
4042 	register struct getdirentries_args /* {
4043 		int fd;
4044 		char *buf;
4045 		u_int count;
4046 		long *basep;
4047 	} */ *uap;
4048 {
4049 	long base;
4050 	int error;
4051 
4052 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base);
4053 	if (error)
4054 		return (error);
4055 	if (uap->basep != NULL)
4056 		error = copyout(&base, uap->basep, sizeof(long));
4057 	return (error);
4058 }
4059 
4060 int
4061 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
4062     long *basep)
4063 {
4064 	struct vnode *vp;
4065 	struct file *fp;
4066 	struct uio auio;
4067 	struct iovec aiov;
4068 	int vfslocked;
4069 	long loff;
4070 	int error, eofflag;
4071 
4072 	AUDIT_ARG_FD(fd);
4073 	if (count > INT_MAX)
4074 		return (EINVAL);
4075 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
4076 		return (error);
4077 	if ((fp->f_flag & FREAD) == 0) {
4078 		fdrop(fp, td);
4079 		return (EBADF);
4080 	}
4081 	vp = fp->f_vnode;
4082 unionread:
4083 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
4084 	if (vp->v_type != VDIR) {
4085 		VFS_UNLOCK_GIANT(vfslocked);
4086 		error = EINVAL;
4087 		goto fail;
4088 	}
4089 	aiov.iov_base = buf;
4090 	aiov.iov_len = count;
4091 	auio.uio_iov = &aiov;
4092 	auio.uio_iovcnt = 1;
4093 	auio.uio_rw = UIO_READ;
4094 	auio.uio_segflg = UIO_USERSPACE;
4095 	auio.uio_td = td;
4096 	auio.uio_resid = count;
4097 	vn_lock(vp, LK_SHARED | LK_RETRY);
4098 	AUDIT_ARG_VNODE1(vp);
4099 	loff = auio.uio_offset = fp->f_offset;
4100 #ifdef MAC
4101 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4102 	if (error == 0)
4103 #endif
4104 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4105 		    NULL);
4106 	fp->f_offset = auio.uio_offset;
4107 	if (error) {
4108 		VOP_UNLOCK(vp, 0);
4109 		VFS_UNLOCK_GIANT(vfslocked);
4110 		goto fail;
4111 	}
4112 	if (count == auio.uio_resid &&
4113 	    (vp->v_vflag & VV_ROOT) &&
4114 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4115 		struct vnode *tvp = vp;
4116 		vp = vp->v_mount->mnt_vnodecovered;
4117 		VREF(vp);
4118 		fp->f_vnode = vp;
4119 		fp->f_data = vp;
4120 		fp->f_offset = 0;
4121 		vput(tvp);
4122 		VFS_UNLOCK_GIANT(vfslocked);
4123 		goto unionread;
4124 	}
4125 	VOP_UNLOCK(vp, 0);
4126 	VFS_UNLOCK_GIANT(vfslocked);
4127 	*basep = loff;
4128 	td->td_retval[0] = count - auio.uio_resid;
4129 fail:
4130 	fdrop(fp, td);
4131 	return (error);
4132 }
4133 
4134 #ifndef _SYS_SYSPROTO_H_
4135 struct getdents_args {
4136 	int fd;
4137 	char *buf;
4138 	size_t count;
4139 };
4140 #endif
4141 int
4142 getdents(td, uap)
4143 	struct thread *td;
4144 	register struct getdents_args /* {
4145 		int fd;
4146 		char *buf;
4147 		u_int count;
4148 	} */ *uap;
4149 {
4150 	struct getdirentries_args ap;
4151 	ap.fd = uap->fd;
4152 	ap.buf = uap->buf;
4153 	ap.count = uap->count;
4154 	ap.basep = NULL;
4155 	return (getdirentries(td, &ap));
4156 }
4157 
4158 /*
4159  * Set the mode mask for creation of filesystem nodes.
4160  */
4161 #ifndef _SYS_SYSPROTO_H_
4162 struct umask_args {
4163 	int	newmask;
4164 };
4165 #endif
4166 int
4167 umask(td, uap)
4168 	struct thread *td;
4169 	struct umask_args /* {
4170 		int newmask;
4171 	} */ *uap;
4172 {
4173 	register struct filedesc *fdp;
4174 
4175 	FILEDESC_XLOCK(td->td_proc->p_fd);
4176 	fdp = td->td_proc->p_fd;
4177 	td->td_retval[0] = fdp->fd_cmask;
4178 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4179 	FILEDESC_XUNLOCK(td->td_proc->p_fd);
4180 	return (0);
4181 }
4182 
4183 /*
4184  * Void all references to file by ripping underlying filesystem away from
4185  * vnode.
4186  */
4187 #ifndef _SYS_SYSPROTO_H_
4188 struct revoke_args {
4189 	char	*path;
4190 };
4191 #endif
4192 int
4193 revoke(td, uap)
4194 	struct thread *td;
4195 	register struct revoke_args /* {
4196 		char *path;
4197 	} */ *uap;
4198 {
4199 	struct vnode *vp;
4200 	struct vattr vattr;
4201 	int error;
4202 	struct nameidata nd;
4203 	int vfslocked;
4204 
4205 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4206 	    UIO_USERSPACE, uap->path, td);
4207 	if ((error = namei(&nd)) != 0)
4208 		return (error);
4209 	vfslocked = NDHASGIANT(&nd);
4210 	vp = nd.ni_vp;
4211 	NDFREE(&nd, NDF_ONLY_PNBUF);
4212 	if (vp->v_type != VCHR) {
4213 		error = EINVAL;
4214 		goto out;
4215 	}
4216 #ifdef MAC
4217 	error = mac_vnode_check_revoke(td->td_ucred, vp);
4218 	if (error)
4219 		goto out;
4220 #endif
4221 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4222 	if (error)
4223 		goto out;
4224 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4225 		error = priv_check(td, PRIV_VFS_ADMIN);
4226 		if (error)
4227 			goto out;
4228 	}
4229 	if (vcount(vp) > 1)
4230 		VOP_REVOKE(vp, REVOKEALL);
4231 out:
4232 	vput(vp);
4233 	VFS_UNLOCK_GIANT(vfslocked);
4234 	return (error);
4235 }
4236 
4237 /*
4238  * Convert a user file descriptor to a kernel file entry.
4239  * A reference on the file entry is held upon returning.
4240  */
4241 int
4242 getvnode(fdp, fd, fpp)
4243 	struct filedesc *fdp;
4244 	int fd;
4245 	struct file **fpp;
4246 {
4247 	int error;
4248 	struct file *fp;
4249 
4250 	error = 0;
4251 	fp = NULL;
4252 	if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
4253 		error = EBADF;
4254 	else if (fp->f_vnode == NULL) {
4255 		error = EINVAL;
4256 		fdrop(fp, curthread);
4257 	}
4258 	*fpp = fp;
4259 	return (error);
4260 }
4261 
4262 /*
4263  * Get an (NFS) file handle.
4264  */
4265 #ifndef _SYS_SYSPROTO_H_
4266 struct lgetfh_args {
4267 	char	*fname;
4268 	fhandle_t *fhp;
4269 };
4270 #endif
4271 int
4272 lgetfh(td, uap)
4273 	struct thread *td;
4274 	register struct lgetfh_args *uap;
4275 {
4276 	struct nameidata nd;
4277 	fhandle_t fh;
4278 	register struct vnode *vp;
4279 	int vfslocked;
4280 	int error;
4281 
4282 	error = priv_check(td, PRIV_VFS_GETFH);
4283 	if (error)
4284 		return (error);
4285 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4286 	    UIO_USERSPACE, uap->fname, td);
4287 	error = namei(&nd);
4288 	if (error)
4289 		return (error);
4290 	vfslocked = NDHASGIANT(&nd);
4291 	NDFREE(&nd, NDF_ONLY_PNBUF);
4292 	vp = nd.ni_vp;
4293 	bzero(&fh, sizeof(fh));
4294 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4295 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4296 	vput(vp);
4297 	VFS_UNLOCK_GIANT(vfslocked);
4298 	if (error)
4299 		return (error);
4300 	error = copyout(&fh, uap->fhp, sizeof (fh));
4301 	return (error);
4302 }
4303 
4304 #ifndef _SYS_SYSPROTO_H_
4305 struct getfh_args {
4306 	char	*fname;
4307 	fhandle_t *fhp;
4308 };
4309 #endif
4310 int
4311 getfh(td, uap)
4312 	struct thread *td;
4313 	register struct getfh_args *uap;
4314 {
4315 	struct nameidata nd;
4316 	fhandle_t fh;
4317 	register struct vnode *vp;
4318 	int vfslocked;
4319 	int error;
4320 
4321 	error = priv_check(td, PRIV_VFS_GETFH);
4322 	if (error)
4323 		return (error);
4324 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4325 	    UIO_USERSPACE, uap->fname, td);
4326 	error = namei(&nd);
4327 	if (error)
4328 		return (error);
4329 	vfslocked = NDHASGIANT(&nd);
4330 	NDFREE(&nd, NDF_ONLY_PNBUF);
4331 	vp = nd.ni_vp;
4332 	bzero(&fh, sizeof(fh));
4333 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4334 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4335 	vput(vp);
4336 	VFS_UNLOCK_GIANT(vfslocked);
4337 	if (error)
4338 		return (error);
4339 	error = copyout(&fh, uap->fhp, sizeof (fh));
4340 	return (error);
4341 }
4342 
4343 /*
4344  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4345  * open descriptor.
4346  *
4347  * warning: do not remove the priv_check() call or this becomes one giant
4348  * security hole.
4349  */
4350 #ifndef _SYS_SYSPROTO_H_
4351 struct fhopen_args {
4352 	const struct fhandle *u_fhp;
4353 	int flags;
4354 };
4355 #endif
4356 int
4357 fhopen(td, uap)
4358 	struct thread *td;
4359 	struct fhopen_args /* {
4360 		const struct fhandle *u_fhp;
4361 		int flags;
4362 	} */ *uap;
4363 {
4364 	struct proc *p = td->td_proc;
4365 	struct mount *mp;
4366 	struct vnode *vp;
4367 	struct fhandle fhp;
4368 	struct vattr vat;
4369 	struct vattr *vap = &vat;
4370 	struct flock lf;
4371 	struct file *fp;
4372 	register struct filedesc *fdp = p->p_fd;
4373 	int fmode, error, type;
4374 	accmode_t accmode;
4375 	struct file *nfp;
4376 	int vfslocked;
4377 	int indx;
4378 
4379 	error = priv_check(td, PRIV_VFS_FHOPEN);
4380 	if (error)
4381 		return (error);
4382 	fmode = FFLAGS(uap->flags);
4383 	/* why not allow a non-read/write open for our lockd? */
4384 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4385 		return (EINVAL);
4386 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4387 	if (error)
4388 		return(error);
4389 	/* find the mount point */
4390 	mp = vfs_busyfs(&fhp.fh_fsid);
4391 	if (mp == NULL)
4392 		return (ESTALE);
4393 	vfslocked = VFS_LOCK_GIANT(mp);
4394 	/* now give me my vnode, it gets returned to me locked */
4395 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4396 	vfs_unbusy(mp);
4397 	if (error)
4398 		goto out;
4399 	/*
4400 	 * from now on we have to make sure not
4401 	 * to forget about the vnode
4402 	 * any error that causes an abort must vput(vp)
4403 	 * just set error = err and 'goto bad;'.
4404 	 */
4405 
4406 	/*
4407 	 * from vn_open
4408 	 */
4409 	if (vp->v_type == VLNK) {
4410 		error = EMLINK;
4411 		goto bad;
4412 	}
4413 	if (vp->v_type == VSOCK) {
4414 		error = EOPNOTSUPP;
4415 		goto bad;
4416 	}
4417 	accmode = 0;
4418 	if (fmode & (FWRITE | O_TRUNC)) {
4419 		if (vp->v_type == VDIR) {
4420 			error = EISDIR;
4421 			goto bad;
4422 		}
4423 		error = vn_writechk(vp);
4424 		if (error)
4425 			goto bad;
4426 		accmode |= VWRITE;
4427 	}
4428 	if (fmode & FREAD)
4429 		accmode |= VREAD;
4430 	if (fmode & O_APPEND)
4431 		accmode |= VAPPEND;
4432 #ifdef MAC
4433 	error = mac_vnode_check_open(td->td_ucred, vp, accmode);
4434 	if (error)
4435 		goto bad;
4436 #endif
4437 	if (accmode) {
4438 		error = VOP_ACCESS(vp, accmode, td->td_ucred, td);
4439 		if (error)
4440 			goto bad;
4441 	}
4442 	if (fmode & O_TRUNC) {
4443 		VOP_UNLOCK(vp, 0);				/* XXX */
4444 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4445 			vrele(vp);
4446 			goto out;
4447 		}
4448 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
4449 #ifdef MAC
4450 		/*
4451 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4452 		 * should be right.
4453 		 */
4454 		error = mac_vnode_check_write(td->td_ucred, td->td_ucred, vp);
4455 		if (error == 0) {
4456 #endif
4457 			VATTR_NULL(vap);
4458 			vap->va_size = 0;
4459 			error = VOP_SETATTR(vp, vap, td->td_ucred);
4460 #ifdef MAC
4461 		}
4462 #endif
4463 		vn_finished_write(mp);
4464 		if (error)
4465 			goto bad;
4466 	}
4467 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, NULL);
4468 	if (error)
4469 		goto bad;
4470 
4471 	if (fmode & FWRITE)
4472 		vp->v_writecount++;
4473 
4474 	/*
4475 	 * end of vn_open code
4476 	 */
4477 
4478 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4479 		if (fmode & FWRITE)
4480 			vp->v_writecount--;
4481 		goto bad;
4482 	}
4483 	/* An extra reference on `nfp' has been held for us by falloc(). */
4484 	fp = nfp;
4485 	nfp->f_vnode = vp;
4486 	finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops);
4487 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4488 		lf.l_whence = SEEK_SET;
4489 		lf.l_start = 0;
4490 		lf.l_len = 0;
4491 		if (fmode & O_EXLOCK)
4492 			lf.l_type = F_WRLCK;
4493 		else
4494 			lf.l_type = F_RDLCK;
4495 		type = F_FLOCK;
4496 		if ((fmode & FNONBLOCK) == 0)
4497 			type |= F_WAIT;
4498 		VOP_UNLOCK(vp, 0);
4499 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4500 			    type)) != 0) {
4501 			/*
4502 			 * The lock request failed.  Normally close the
4503 			 * descriptor but handle the case where someone might
4504 			 * have dup()d or close()d it when we weren't looking.
4505 			 */
4506 			fdclose(fdp, fp, indx, td);
4507 
4508 			/*
4509 			 * release our private reference
4510 			 */
4511 			fdrop(fp, td);
4512 			goto out;
4513 		}
4514 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4515 		atomic_set_int(&fp->f_flag, FHASLOCK);
4516 	}
4517 
4518 	VOP_UNLOCK(vp, 0);
4519 	fdrop(fp, td);
4520 	vfs_rel(mp);
4521 	VFS_UNLOCK_GIANT(vfslocked);
4522 	td->td_retval[0] = indx;
4523 	return (0);
4524 
4525 bad:
4526 	vput(vp);
4527 out:
4528 	VFS_UNLOCK_GIANT(vfslocked);
4529 	return (error);
4530 }
4531 
4532 /*
4533  * Stat an (NFS) file handle.
4534  */
4535 #ifndef _SYS_SYSPROTO_H_
4536 struct fhstat_args {
4537 	struct fhandle *u_fhp;
4538 	struct stat *sb;
4539 };
4540 #endif
4541 int
4542 fhstat(td, uap)
4543 	struct thread *td;
4544 	register struct fhstat_args /* {
4545 		struct fhandle *u_fhp;
4546 		struct stat *sb;
4547 	} */ *uap;
4548 {
4549 	struct stat sb;
4550 	fhandle_t fh;
4551 	struct mount *mp;
4552 	struct vnode *vp;
4553 	int vfslocked;
4554 	int error;
4555 
4556 	error = priv_check(td, PRIV_VFS_FHSTAT);
4557 	if (error)
4558 		return (error);
4559 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4560 	if (error)
4561 		return (error);
4562 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4563 		return (ESTALE);
4564 	vfslocked = VFS_LOCK_GIANT(mp);
4565 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4566 	vfs_unbusy(mp);
4567 	if (error) {
4568 		VFS_UNLOCK_GIANT(vfslocked);
4569 		return (error);
4570 	}
4571 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4572 	vput(vp);
4573 	VFS_UNLOCK_GIANT(vfslocked);
4574 	if (error)
4575 		return (error);
4576 	error = copyout(&sb, uap->sb, sizeof(sb));
4577 	return (error);
4578 }
4579 
4580 /*
4581  * Implement fstatfs() for (NFS) file handles.
4582  */
4583 #ifndef _SYS_SYSPROTO_H_
4584 struct fhstatfs_args {
4585 	struct fhandle *u_fhp;
4586 	struct statfs *buf;
4587 };
4588 #endif
4589 int
4590 fhstatfs(td, uap)
4591 	struct thread *td;
4592 	struct fhstatfs_args /* {
4593 		struct fhandle *u_fhp;
4594 		struct statfs *buf;
4595 	} */ *uap;
4596 {
4597 	struct statfs sf;
4598 	fhandle_t fh;
4599 	int error;
4600 
4601 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4602 	if (error)
4603 		return (error);
4604 	error = kern_fhstatfs(td, fh, &sf);
4605 	if (error)
4606 		return (error);
4607 	return (copyout(&sf, uap->buf, sizeof(sf)));
4608 }
4609 
4610 int
4611 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4612 {
4613 	struct statfs *sp;
4614 	struct mount *mp;
4615 	struct vnode *vp;
4616 	int vfslocked;
4617 	int error;
4618 
4619 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4620 	if (error)
4621 		return (error);
4622 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4623 		return (ESTALE);
4624 	vfslocked = VFS_LOCK_GIANT(mp);
4625 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4626 	if (error) {
4627 		vfs_unbusy(mp);
4628 		VFS_UNLOCK_GIANT(vfslocked);
4629 		return (error);
4630 	}
4631 	vput(vp);
4632 	error = prison_canseemount(td->td_ucred, mp);
4633 	if (error)
4634 		goto out;
4635 #ifdef MAC
4636 	error = mac_mount_check_stat(td->td_ucred, mp);
4637 	if (error)
4638 		goto out;
4639 #endif
4640 	/*
4641 	 * Set these in case the underlying filesystem fails to do so.
4642 	 */
4643 	sp = &mp->mnt_stat;
4644 	sp->f_version = STATFS_VERSION;
4645 	sp->f_namemax = NAME_MAX;
4646 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4647 	error = VFS_STATFS(mp, sp);
4648 	if (error == 0)
4649 		*buf = *sp;
4650 out:
4651 	vfs_unbusy(mp);
4652 	VFS_UNLOCK_GIANT(vfslocked);
4653 	return (error);
4654 }
4655