xref: /freebsd/sys/kern/vfs_syscalls.c (revision 839529caa9c35f92b638dbe074655598e7a6bb6f)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_capsicum.h"
41 #include "opt_compat.h"
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bio.h>
47 #include <sys/buf.h>
48 #include <sys/capsicum.h>
49 #include <sys/disk.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
59 #include <sys/file.h>
60 #include <sys/filio.h>
61 #include <sys/limits.h>
62 #include <sys/linker.h>
63 #include <sys/rwlock.h>
64 #include <sys/sdt.h>
65 #include <sys/stat.h>
66 #include <sys/sx.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/priv.h>
70 #include <sys/proc.h>
71 #include <sys/dirent.h>
72 #include <sys/jail.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sysctl.h>
75 #ifdef KTRACE
76 #include <sys/ktrace.h>
77 #endif
78 
79 #include <machine/stdarg.h>
80 
81 #include <security/audit/audit.h>
82 #include <security/mac/mac_framework.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/uma.h>
88 
89 #include <ufs/ufs/quota.h>
90 
91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
92 
93 SDT_PROVIDER_DEFINE(vfs);
94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
96 
97 static int kern_chflagsat(struct thread *td, int fd, const char *path,
98     enum uio_seg pathseg, u_long flags, int atflag);
99 static int setfflags(struct thread *td, struct vnode *, u_long);
100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
101 static int getutimens(const struct timespec *, enum uio_seg,
102     struct timespec *, int *);
103 static int setutimes(struct thread *td, struct vnode *,
104     const struct timespec *, int, int);
105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
106     struct thread *td);
107 
108 /*
109  * The module initialization routine for POSIX asynchronous I/O will
110  * set this to the version of AIO that it implements.  (Zero means
111  * that it is not implemented.)  This value is used here by pathconf()
112  * and in kern_descrip.c by fpathconf().
113  */
114 int async_io_version;
115 
116 /*
117  * Sync each mounted filesystem.
118  */
119 #ifndef _SYS_SYSPROTO_H_
120 struct sync_args {
121 	int     dummy;
122 };
123 #endif
124 /* ARGSUSED */
125 int
126 sys_sync(td, uap)
127 	struct thread *td;
128 	struct sync_args *uap;
129 {
130 	struct mount *mp, *nmp;
131 	int save;
132 
133 	mtx_lock(&mountlist_mtx);
134 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
135 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
136 			nmp = TAILQ_NEXT(mp, mnt_list);
137 			continue;
138 		}
139 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
140 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
141 			save = curthread_pflags_set(TDP_SYNCIO);
142 			vfs_msync(mp, MNT_NOWAIT);
143 			VFS_SYNC(mp, MNT_NOWAIT);
144 			curthread_pflags_restore(save);
145 			vn_finished_write(mp);
146 		}
147 		mtx_lock(&mountlist_mtx);
148 		nmp = TAILQ_NEXT(mp, mnt_list);
149 		vfs_unbusy(mp);
150 	}
151 	mtx_unlock(&mountlist_mtx);
152 	return (0);
153 }
154 
155 /*
156  * Change filesystem quotas.
157  */
158 #ifndef _SYS_SYSPROTO_H_
159 struct quotactl_args {
160 	char *path;
161 	int cmd;
162 	int uid;
163 	caddr_t arg;
164 };
165 #endif
166 int
167 sys_quotactl(td, uap)
168 	struct thread *td;
169 	register struct quotactl_args /* {
170 		char *path;
171 		int cmd;
172 		int uid;
173 		caddr_t arg;
174 	} */ *uap;
175 {
176 	struct mount *mp;
177 	struct nameidata nd;
178 	int error;
179 
180 	AUDIT_ARG_CMD(uap->cmd);
181 	AUDIT_ARG_UID(uap->uid);
182 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
183 		return (EPERM);
184 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
185 	    uap->path, td);
186 	if ((error = namei(&nd)) != 0)
187 		return (error);
188 	NDFREE(&nd, NDF_ONLY_PNBUF);
189 	mp = nd.ni_vp->v_mount;
190 	vfs_ref(mp);
191 	vput(nd.ni_vp);
192 	error = vfs_busy(mp, 0);
193 	vfs_rel(mp);
194 	if (error != 0)
195 		return (error);
196 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
197 
198 	/*
199 	 * Since quota on operation typically needs to open quota
200 	 * file, the Q_QUOTAON handler needs to unbusy the mount point
201 	 * before calling into namei.  Otherwise, unmount might be
202 	 * started between two vfs_busy() invocations (first is our,
203 	 * second is from mount point cross-walk code in lookup()),
204 	 * causing deadlock.
205 	 *
206 	 * Require that Q_QUOTAON handles the vfs_busy() reference on
207 	 * its own, always returning with ubusied mount point.
208 	 */
209 	if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
210 		vfs_unbusy(mp);
211 	return (error);
212 }
213 
214 /*
215  * Used by statfs conversion routines to scale the block size up if
216  * necessary so that all of the block counts are <= 'max_size'.  Note
217  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
218  * value of 'n'.
219  */
220 void
221 statfs_scale_blocks(struct statfs *sf, long max_size)
222 {
223 	uint64_t count;
224 	int shift;
225 
226 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
227 
228 	/*
229 	 * Attempt to scale the block counts to give a more accurate
230 	 * overview to userland of the ratio of free space to used
231 	 * space.  To do this, find the largest block count and compute
232 	 * a divisor that lets it fit into a signed integer <= max_size.
233 	 */
234 	if (sf->f_bavail < 0)
235 		count = -sf->f_bavail;
236 	else
237 		count = sf->f_bavail;
238 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
239 	if (count <= max_size)
240 		return;
241 
242 	count >>= flsl(max_size);
243 	shift = 0;
244 	while (count > 0) {
245 		shift++;
246 		count >>=1;
247 	}
248 
249 	sf->f_bsize <<= shift;
250 	sf->f_blocks >>= shift;
251 	sf->f_bfree >>= shift;
252 	sf->f_bavail >>= shift;
253 }
254 
255 /*
256  * Get filesystem statistics.
257  */
258 #ifndef _SYS_SYSPROTO_H_
259 struct statfs_args {
260 	char *path;
261 	struct statfs *buf;
262 };
263 #endif
264 int
265 sys_statfs(td, uap)
266 	struct thread *td;
267 	register struct statfs_args /* {
268 		char *path;
269 		struct statfs *buf;
270 	} */ *uap;
271 {
272 	struct statfs sf;
273 	int error;
274 
275 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
276 	if (error == 0)
277 		error = copyout(&sf, uap->buf, sizeof(sf));
278 	return (error);
279 }
280 
281 int
282 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
283     struct statfs *buf)
284 {
285 	struct mount *mp;
286 	struct statfs *sp, sb;
287 	struct nameidata nd;
288 	int error;
289 
290 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
291 	    pathseg, path, td);
292 	error = namei(&nd);
293 	if (error != 0)
294 		return (error);
295 	mp = nd.ni_vp->v_mount;
296 	vfs_ref(mp);
297 	NDFREE(&nd, NDF_ONLY_PNBUF);
298 	vput(nd.ni_vp);
299 	error = vfs_busy(mp, 0);
300 	vfs_rel(mp);
301 	if (error != 0)
302 		return (error);
303 #ifdef MAC
304 	error = mac_mount_check_stat(td->td_ucred, mp);
305 	if (error != 0)
306 		goto out;
307 #endif
308 	/*
309 	 * Set these in case the underlying filesystem fails to do so.
310 	 */
311 	sp = &mp->mnt_stat;
312 	sp->f_version = STATFS_VERSION;
313 	sp->f_namemax = NAME_MAX;
314 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
315 	error = VFS_STATFS(mp, sp);
316 	if (error != 0)
317 		goto out;
318 	if (priv_check(td, PRIV_VFS_GENERATION)) {
319 		bcopy(sp, &sb, sizeof(sb));
320 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
321 		prison_enforce_statfs(td->td_ucred, mp, &sb);
322 		sp = &sb;
323 	}
324 	*buf = *sp;
325 out:
326 	vfs_unbusy(mp);
327 	return (error);
328 }
329 
330 /*
331  * Get filesystem statistics.
332  */
333 #ifndef _SYS_SYSPROTO_H_
334 struct fstatfs_args {
335 	int fd;
336 	struct statfs *buf;
337 };
338 #endif
339 int
340 sys_fstatfs(td, uap)
341 	struct thread *td;
342 	register struct fstatfs_args /* {
343 		int fd;
344 		struct statfs *buf;
345 	} */ *uap;
346 {
347 	struct statfs sf;
348 	int error;
349 
350 	error = kern_fstatfs(td, uap->fd, &sf);
351 	if (error == 0)
352 		error = copyout(&sf, uap->buf, sizeof(sf));
353 	return (error);
354 }
355 
356 int
357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
358 {
359 	struct file *fp;
360 	struct mount *mp;
361 	struct statfs *sp, sb;
362 	struct vnode *vp;
363 	cap_rights_t rights;
364 	int error;
365 
366 	AUDIT_ARG_FD(fd);
367 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp);
368 	if (error != 0)
369 		return (error);
370 	vp = fp->f_vnode;
371 	vn_lock(vp, LK_SHARED | LK_RETRY);
372 #ifdef AUDIT
373 	AUDIT_ARG_VNODE1(vp);
374 #endif
375 	mp = vp->v_mount;
376 	if (mp)
377 		vfs_ref(mp);
378 	VOP_UNLOCK(vp, 0);
379 	fdrop(fp, td);
380 	if (mp == NULL) {
381 		error = EBADF;
382 		goto out;
383 	}
384 	error = vfs_busy(mp, 0);
385 	vfs_rel(mp);
386 	if (error != 0)
387 		return (error);
388 #ifdef MAC
389 	error = mac_mount_check_stat(td->td_ucred, mp);
390 	if (error != 0)
391 		goto out;
392 #endif
393 	/*
394 	 * Set these in case the underlying filesystem fails to do so.
395 	 */
396 	sp = &mp->mnt_stat;
397 	sp->f_version = STATFS_VERSION;
398 	sp->f_namemax = NAME_MAX;
399 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
400 	error = VFS_STATFS(mp, sp);
401 	if (error != 0)
402 		goto out;
403 	if (priv_check(td, PRIV_VFS_GENERATION)) {
404 		bcopy(sp, &sb, sizeof(sb));
405 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
406 		prison_enforce_statfs(td->td_ucred, mp, &sb);
407 		sp = &sb;
408 	}
409 	*buf = *sp;
410 out:
411 	if (mp)
412 		vfs_unbusy(mp);
413 	return (error);
414 }
415 
416 /*
417  * Get statistics on all filesystems.
418  */
419 #ifndef _SYS_SYSPROTO_H_
420 struct getfsstat_args {
421 	struct statfs *buf;
422 	long bufsize;
423 	int flags;
424 };
425 #endif
426 int
427 sys_getfsstat(td, uap)
428 	struct thread *td;
429 	register struct getfsstat_args /* {
430 		struct statfs *buf;
431 		long bufsize;
432 		int flags;
433 	} */ *uap;
434 {
435 	size_t count;
436 	int error;
437 
438 	if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
439 		return (EINVAL);
440 	error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
441 	    UIO_USERSPACE, uap->flags);
442 	if (error == 0)
443 		td->td_retval[0] = count;
444 	return (error);
445 }
446 
447 /*
448  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
449  *	The caller is responsible for freeing memory which will be allocated
450  *	in '*buf'.
451  */
452 int
453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
454     size_t *countp, enum uio_seg bufseg, int flags)
455 {
456 	struct mount *mp, *nmp;
457 	struct statfs *sfsp, *sp, sb;
458 	size_t count, maxcount;
459 	int error;
460 
461 	maxcount = bufsize / sizeof(struct statfs);
462 	if (bufsize == 0)
463 		sfsp = NULL;
464 	else if (bufseg == UIO_USERSPACE)
465 		sfsp = *buf;
466 	else /* if (bufseg == UIO_SYSSPACE) */ {
467 		count = 0;
468 		mtx_lock(&mountlist_mtx);
469 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
470 			count++;
471 		}
472 		mtx_unlock(&mountlist_mtx);
473 		if (maxcount > count)
474 			maxcount = count;
475 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
476 		    M_WAITOK);
477 	}
478 	count = 0;
479 	mtx_lock(&mountlist_mtx);
480 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
481 		if (prison_canseemount(td->td_ucred, mp) != 0) {
482 			nmp = TAILQ_NEXT(mp, mnt_list);
483 			continue;
484 		}
485 #ifdef MAC
486 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
487 			nmp = TAILQ_NEXT(mp, mnt_list);
488 			continue;
489 		}
490 #endif
491 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
492 			nmp = TAILQ_NEXT(mp, mnt_list);
493 			continue;
494 		}
495 		if (sfsp && count < maxcount) {
496 			sp = &mp->mnt_stat;
497 			/*
498 			 * Set these in case the underlying filesystem
499 			 * fails to do so.
500 			 */
501 			sp->f_version = STATFS_VERSION;
502 			sp->f_namemax = NAME_MAX;
503 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
504 			/*
505 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
506 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
507 			 * overrides MNT_WAIT.
508 			 */
509 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
510 			    (flags & MNT_WAIT)) &&
511 			    (error = VFS_STATFS(mp, sp))) {
512 				mtx_lock(&mountlist_mtx);
513 				nmp = TAILQ_NEXT(mp, mnt_list);
514 				vfs_unbusy(mp);
515 				continue;
516 			}
517 			if (priv_check(td, PRIV_VFS_GENERATION)) {
518 				bcopy(sp, &sb, sizeof(sb));
519 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
520 				prison_enforce_statfs(td->td_ucred, mp, &sb);
521 				sp = &sb;
522 			}
523 			if (bufseg == UIO_SYSSPACE)
524 				bcopy(sp, sfsp, sizeof(*sp));
525 			else /* if (bufseg == UIO_USERSPACE) */ {
526 				error = copyout(sp, sfsp, sizeof(*sp));
527 				if (error != 0) {
528 					vfs_unbusy(mp);
529 					return (error);
530 				}
531 			}
532 			sfsp++;
533 		}
534 		count++;
535 		mtx_lock(&mountlist_mtx);
536 		nmp = TAILQ_NEXT(mp, mnt_list);
537 		vfs_unbusy(mp);
538 	}
539 	mtx_unlock(&mountlist_mtx);
540 	if (sfsp && count > maxcount)
541 		*countp = maxcount;
542 	else
543 		*countp = count;
544 	return (0);
545 }
546 
547 #ifdef COMPAT_FREEBSD4
548 /*
549  * Get old format filesystem statistics.
550  */
551 static void cvtstatfs(struct statfs *, struct ostatfs *);
552 
553 #ifndef _SYS_SYSPROTO_H_
554 struct freebsd4_statfs_args {
555 	char *path;
556 	struct ostatfs *buf;
557 };
558 #endif
559 int
560 freebsd4_statfs(td, uap)
561 	struct thread *td;
562 	struct freebsd4_statfs_args /* {
563 		char *path;
564 		struct ostatfs *buf;
565 	} */ *uap;
566 {
567 	struct ostatfs osb;
568 	struct statfs sf;
569 	int error;
570 
571 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
572 	if (error != 0)
573 		return (error);
574 	cvtstatfs(&sf, &osb);
575 	return (copyout(&osb, uap->buf, sizeof(osb)));
576 }
577 
578 /*
579  * Get filesystem statistics.
580  */
581 #ifndef _SYS_SYSPROTO_H_
582 struct freebsd4_fstatfs_args {
583 	int fd;
584 	struct ostatfs *buf;
585 };
586 #endif
587 int
588 freebsd4_fstatfs(td, uap)
589 	struct thread *td;
590 	struct freebsd4_fstatfs_args /* {
591 		int fd;
592 		struct ostatfs *buf;
593 	} */ *uap;
594 {
595 	struct ostatfs osb;
596 	struct statfs sf;
597 	int error;
598 
599 	error = kern_fstatfs(td, uap->fd, &sf);
600 	if (error != 0)
601 		return (error);
602 	cvtstatfs(&sf, &osb);
603 	return (copyout(&osb, uap->buf, sizeof(osb)));
604 }
605 
606 /*
607  * Get statistics on all filesystems.
608  */
609 #ifndef _SYS_SYSPROTO_H_
610 struct freebsd4_getfsstat_args {
611 	struct ostatfs *buf;
612 	long bufsize;
613 	int flags;
614 };
615 #endif
616 int
617 freebsd4_getfsstat(td, uap)
618 	struct thread *td;
619 	register struct freebsd4_getfsstat_args /* {
620 		struct ostatfs *buf;
621 		long bufsize;
622 		int flags;
623 	} */ *uap;
624 {
625 	struct statfs *buf, *sp;
626 	struct ostatfs osb;
627 	size_t count, size;
628 	int error;
629 
630 	if (uap->bufsize < 0)
631 		return (EINVAL);
632 	count = uap->bufsize / sizeof(struct ostatfs);
633 	if (count > SIZE_MAX / sizeof(struct statfs))
634 		return (EINVAL);
635 	size = count * sizeof(struct statfs);
636 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
637 	    uap->flags);
638 	td->td_retval[0] = count;
639 	if (size != 0) {
640 		sp = buf;
641 		while (count != 0 && error == 0) {
642 			cvtstatfs(sp, &osb);
643 			error = copyout(&osb, uap->buf, sizeof(osb));
644 			sp++;
645 			uap->buf++;
646 			count--;
647 		}
648 		free(buf, M_TEMP);
649 	}
650 	return (error);
651 }
652 
653 /*
654  * Implement fstatfs() for (NFS) file handles.
655  */
656 #ifndef _SYS_SYSPROTO_H_
657 struct freebsd4_fhstatfs_args {
658 	struct fhandle *u_fhp;
659 	struct ostatfs *buf;
660 };
661 #endif
662 int
663 freebsd4_fhstatfs(td, uap)
664 	struct thread *td;
665 	struct freebsd4_fhstatfs_args /* {
666 		struct fhandle *u_fhp;
667 		struct ostatfs *buf;
668 	} */ *uap;
669 {
670 	struct ostatfs osb;
671 	struct statfs sf;
672 	fhandle_t fh;
673 	int error;
674 
675 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
676 	if (error != 0)
677 		return (error);
678 	error = kern_fhstatfs(td, fh, &sf);
679 	if (error != 0)
680 		return (error);
681 	cvtstatfs(&sf, &osb);
682 	return (copyout(&osb, uap->buf, sizeof(osb)));
683 }
684 
685 /*
686  * Convert a new format statfs structure to an old format statfs structure.
687  */
688 static void
689 cvtstatfs(nsp, osp)
690 	struct statfs *nsp;
691 	struct ostatfs *osp;
692 {
693 
694 	statfs_scale_blocks(nsp, LONG_MAX);
695 	bzero(osp, sizeof(*osp));
696 	osp->f_bsize = nsp->f_bsize;
697 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
698 	osp->f_blocks = nsp->f_blocks;
699 	osp->f_bfree = nsp->f_bfree;
700 	osp->f_bavail = nsp->f_bavail;
701 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
702 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
703 	osp->f_owner = nsp->f_owner;
704 	osp->f_type = nsp->f_type;
705 	osp->f_flags = nsp->f_flags;
706 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
707 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
708 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
709 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
710 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
711 	    MIN(MFSNAMELEN, OMFSNAMELEN));
712 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
713 	    MIN(MNAMELEN, OMNAMELEN));
714 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
715 	    MIN(MNAMELEN, OMNAMELEN));
716 	osp->f_fsid = nsp->f_fsid;
717 }
718 #endif /* COMPAT_FREEBSD4 */
719 
720 /*
721  * Change current working directory to a given file descriptor.
722  */
723 #ifndef _SYS_SYSPROTO_H_
724 struct fchdir_args {
725 	int	fd;
726 };
727 #endif
728 int
729 sys_fchdir(td, uap)
730 	struct thread *td;
731 	struct fchdir_args /* {
732 		int fd;
733 	} */ *uap;
734 {
735 	struct vnode *vp, *tdp;
736 	struct mount *mp;
737 	struct file *fp;
738 	cap_rights_t rights;
739 	int error;
740 
741 	AUDIT_ARG_FD(uap->fd);
742 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
743 	    &fp);
744 	if (error != 0)
745 		return (error);
746 	vp = fp->f_vnode;
747 	VREF(vp);
748 	fdrop(fp, td);
749 	vn_lock(vp, LK_SHARED | LK_RETRY);
750 	AUDIT_ARG_VNODE1(vp);
751 	error = change_dir(vp, td);
752 	while (!error && (mp = vp->v_mountedhere) != NULL) {
753 		if (vfs_busy(mp, 0))
754 			continue;
755 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
756 		vfs_unbusy(mp);
757 		if (error != 0)
758 			break;
759 		vput(vp);
760 		vp = tdp;
761 	}
762 	if (error != 0) {
763 		vput(vp);
764 		return (error);
765 	}
766 	VOP_UNLOCK(vp, 0);
767 	pwd_chdir(td, vp);
768 	return (0);
769 }
770 
771 /*
772  * Change current working directory (``.'').
773  */
774 #ifndef _SYS_SYSPROTO_H_
775 struct chdir_args {
776 	char	*path;
777 };
778 #endif
779 int
780 sys_chdir(td, uap)
781 	struct thread *td;
782 	struct chdir_args /* {
783 		char *path;
784 	} */ *uap;
785 {
786 
787 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
788 }
789 
790 int
791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
792 {
793 	struct nameidata nd;
794 	int error;
795 
796 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
797 	    pathseg, path, td);
798 	if ((error = namei(&nd)) != 0)
799 		return (error);
800 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
801 		vput(nd.ni_vp);
802 		NDFREE(&nd, NDF_ONLY_PNBUF);
803 		return (error);
804 	}
805 	VOP_UNLOCK(nd.ni_vp, 0);
806 	NDFREE(&nd, NDF_ONLY_PNBUF);
807 	pwd_chdir(td, nd.ni_vp);
808 	return (0);
809 }
810 
811 /*
812  * Change notion of root (``/'') directory.
813  */
814 #ifndef _SYS_SYSPROTO_H_
815 struct chroot_args {
816 	char	*path;
817 };
818 #endif
819 int
820 sys_chroot(td, uap)
821 	struct thread *td;
822 	struct chroot_args /* {
823 		char *path;
824 	} */ *uap;
825 {
826 	struct nameidata nd;
827 	int error;
828 
829 	error = priv_check(td, PRIV_VFS_CHROOT);
830 	if (error != 0)
831 		return (error);
832 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
833 	    UIO_USERSPACE, uap->path, td);
834 	error = namei(&nd);
835 	if (error != 0)
836 		goto error;
837 	error = change_dir(nd.ni_vp, td);
838 	if (error != 0)
839 		goto e_vunlock;
840 #ifdef MAC
841 	error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
842 	if (error != 0)
843 		goto e_vunlock;
844 #endif
845 	VOP_UNLOCK(nd.ni_vp, 0);
846 	error = pwd_chroot(td, nd.ni_vp);
847 	vrele(nd.ni_vp);
848 	NDFREE(&nd, NDF_ONLY_PNBUF);
849 	return (error);
850 e_vunlock:
851 	vput(nd.ni_vp);
852 error:
853 	NDFREE(&nd, NDF_ONLY_PNBUF);
854 	return (error);
855 }
856 
857 /*
858  * Common routine for chroot and chdir.  Callers must provide a locked vnode
859  * instance.
860  */
861 int
862 change_dir(vp, td)
863 	struct vnode *vp;
864 	struct thread *td;
865 {
866 #ifdef MAC
867 	int error;
868 #endif
869 
870 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
871 	if (vp->v_type != VDIR)
872 		return (ENOTDIR);
873 #ifdef MAC
874 	error = mac_vnode_check_chdir(td->td_ucred, vp);
875 	if (error != 0)
876 		return (error);
877 #endif
878 	return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
879 }
880 
881 static __inline void
882 flags_to_rights(int flags, cap_rights_t *rightsp)
883 {
884 
885 	if (flags & O_EXEC) {
886 		cap_rights_set(rightsp, CAP_FEXECVE);
887 	} else {
888 		switch ((flags & O_ACCMODE)) {
889 		case O_RDONLY:
890 			cap_rights_set(rightsp, CAP_READ);
891 			break;
892 		case O_RDWR:
893 			cap_rights_set(rightsp, CAP_READ);
894 			/* FALLTHROUGH */
895 		case O_WRONLY:
896 			cap_rights_set(rightsp, CAP_WRITE);
897 			if (!(flags & (O_APPEND | O_TRUNC)))
898 				cap_rights_set(rightsp, CAP_SEEK);
899 			break;
900 		}
901 	}
902 
903 	if (flags & O_CREAT)
904 		cap_rights_set(rightsp, CAP_CREATE);
905 
906 	if (flags & O_TRUNC)
907 		cap_rights_set(rightsp, CAP_FTRUNCATE);
908 
909 	if (flags & (O_SYNC | O_FSYNC))
910 		cap_rights_set(rightsp, CAP_FSYNC);
911 
912 	if (flags & (O_EXLOCK | O_SHLOCK))
913 		cap_rights_set(rightsp, CAP_FLOCK);
914 }
915 
916 /*
917  * Check permissions, allocate an open file structure, and call the device
918  * open routine if any.
919  */
920 #ifndef _SYS_SYSPROTO_H_
921 struct open_args {
922 	char	*path;
923 	int	flags;
924 	int	mode;
925 };
926 #endif
927 int
928 sys_open(td, uap)
929 	struct thread *td;
930 	register struct open_args /* {
931 		char *path;
932 		int flags;
933 		int mode;
934 	} */ *uap;
935 {
936 
937 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
938 	    uap->flags, uap->mode));
939 }
940 
941 #ifndef _SYS_SYSPROTO_H_
942 struct openat_args {
943 	int	fd;
944 	char	*path;
945 	int	flag;
946 	int	mode;
947 };
948 #endif
949 int
950 sys_openat(struct thread *td, struct openat_args *uap)
951 {
952 
953 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
954 	    uap->mode));
955 }
956 
957 int
958 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
959     int flags, int mode)
960 {
961 	struct proc *p = td->td_proc;
962 	struct filedesc *fdp = p->p_fd;
963 	struct file *fp;
964 	struct vnode *vp;
965 	struct nameidata nd;
966 	cap_rights_t rights;
967 	int cmode, error, indx;
968 
969 	indx = -1;
970 
971 	AUDIT_ARG_FFLAGS(flags);
972 	AUDIT_ARG_MODE(mode);
973 	/* XXX: audit dirfd */
974 	cap_rights_init(&rights, CAP_LOOKUP);
975 	flags_to_rights(flags, &rights);
976 	/*
977 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
978 	 * may be specified.
979 	 */
980 	if (flags & O_EXEC) {
981 		if (flags & O_ACCMODE)
982 			return (EINVAL);
983 	} else if ((flags & O_ACCMODE) == O_ACCMODE) {
984 		return (EINVAL);
985 	} else {
986 		flags = FFLAGS(flags);
987 	}
988 
989 	/*
990 	 * Allocate a file structure. The descriptor to reference it
991 	 * is allocated and set by finstall() below.
992 	 */
993 	error = falloc_noinstall(td, &fp);
994 	if (error != 0)
995 		return (error);
996 	/*
997 	 * An extra reference on `fp' has been held for us by
998 	 * falloc_noinstall().
999 	 */
1000 	/* Set the flags early so the finit in devfs can pick them up. */
1001 	fp->f_flag = flags & FMASK;
1002 	cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1003 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
1004 	    &rights, td);
1005 	td->td_dupfd = -1;		/* XXX check for fdopen */
1006 	error = vn_open(&nd, &flags, cmode, fp);
1007 	if (error != 0) {
1008 		/*
1009 		 * If the vn_open replaced the method vector, something
1010 		 * wonderous happened deep below and we just pass it up
1011 		 * pretending we know what we do.
1012 		 */
1013 		if (error == ENXIO && fp->f_ops != &badfileops)
1014 			goto success;
1015 
1016 		/*
1017 		 * Handle special fdopen() case. bleh.
1018 		 *
1019 		 * Don't do this for relative (capability) lookups; we don't
1020 		 * understand exactly what would happen, and we don't think
1021 		 * that it ever should.
1022 		 */
1023 		if (nd.ni_strictrelative == 0 &&
1024 		    (error == ENODEV || error == ENXIO) &&
1025 		    td->td_dupfd >= 0) {
1026 			error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
1027 			    &indx);
1028 			if (error == 0)
1029 				goto success;
1030 		}
1031 
1032 		goto bad;
1033 	}
1034 	td->td_dupfd = 0;
1035 	NDFREE(&nd, NDF_ONLY_PNBUF);
1036 	vp = nd.ni_vp;
1037 
1038 	/*
1039 	 * Store the vnode, for any f_type. Typically, the vnode use
1040 	 * count is decremented by direct call to vn_closefile() for
1041 	 * files that switched type in the cdevsw fdopen() method.
1042 	 */
1043 	fp->f_vnode = vp;
1044 	/*
1045 	 * If the file wasn't claimed by devfs bind it to the normal
1046 	 * vnode operations here.
1047 	 */
1048 	if (fp->f_ops == &badfileops) {
1049 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
1050 		fp->f_seqcount = 1;
1051 		finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
1052 		    DTYPE_VNODE, vp, &vnops);
1053 	}
1054 
1055 	VOP_UNLOCK(vp, 0);
1056 	if (flags & O_TRUNC) {
1057 		error = fo_truncate(fp, 0, td->td_ucred, td);
1058 		if (error != 0)
1059 			goto bad;
1060 	}
1061 success:
1062 	/*
1063 	 * If we haven't already installed the FD (for dupfdopen), do so now.
1064 	 */
1065 	if (indx == -1) {
1066 		struct filecaps *fcaps;
1067 
1068 #ifdef CAPABILITIES
1069 		if (nd.ni_strictrelative == 1)
1070 			fcaps = &nd.ni_filecaps;
1071 		else
1072 #endif
1073 			fcaps = NULL;
1074 		error = finstall(td, fp, &indx, flags, fcaps);
1075 		/* On success finstall() consumes fcaps. */
1076 		if (error != 0) {
1077 			filecaps_free(&nd.ni_filecaps);
1078 			goto bad;
1079 		}
1080 	} else {
1081 		filecaps_free(&nd.ni_filecaps);
1082 	}
1083 
1084 	/*
1085 	 * Release our private reference, leaving the one associated with
1086 	 * the descriptor table intact.
1087 	 */
1088 	fdrop(fp, td);
1089 	td->td_retval[0] = indx;
1090 	return (0);
1091 bad:
1092 	KASSERT(indx == -1, ("indx=%d, should be -1", indx));
1093 	fdrop(fp, td);
1094 	return (error);
1095 }
1096 
1097 #ifdef COMPAT_43
1098 /*
1099  * Create a file.
1100  */
1101 #ifndef _SYS_SYSPROTO_H_
1102 struct ocreat_args {
1103 	char	*path;
1104 	int	mode;
1105 };
1106 #endif
1107 int
1108 ocreat(td, uap)
1109 	struct thread *td;
1110 	register struct ocreat_args /* {
1111 		char *path;
1112 		int mode;
1113 	} */ *uap;
1114 {
1115 
1116 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
1117 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1118 }
1119 #endif /* COMPAT_43 */
1120 
1121 /*
1122  * Create a special file.
1123  */
1124 #ifndef _SYS_SYSPROTO_H_
1125 struct mknod_args {
1126 	char	*path;
1127 	int	mode;
1128 	int	dev;
1129 };
1130 #endif
1131 int
1132 sys_mknod(td, uap)
1133 	struct thread *td;
1134 	register struct mknod_args /* {
1135 		char *path;
1136 		int mode;
1137 		int dev;
1138 	} */ *uap;
1139 {
1140 
1141 	return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
1142 	    uap->mode, uap->dev));
1143 }
1144 
1145 #ifndef _SYS_SYSPROTO_H_
1146 struct mknodat_args {
1147 	int	fd;
1148 	char	*path;
1149 	mode_t	mode;
1150 	dev_t	dev;
1151 };
1152 #endif
1153 int
1154 sys_mknodat(struct thread *td, struct mknodat_args *uap)
1155 {
1156 
1157 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
1158 	    uap->dev));
1159 }
1160 
1161 int
1162 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1163     int mode, int dev)
1164 {
1165 	struct vnode *vp;
1166 	struct mount *mp;
1167 	struct vattr vattr;
1168 	struct nameidata nd;
1169 	cap_rights_t rights;
1170 	int error, whiteout = 0;
1171 
1172 	AUDIT_ARG_MODE(mode);
1173 	AUDIT_ARG_DEV(dev);
1174 	switch (mode & S_IFMT) {
1175 	case S_IFCHR:
1176 	case S_IFBLK:
1177 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1178 		break;
1179 	case S_IFMT:
1180 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1181 		break;
1182 	case S_IFWHT:
1183 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1184 		break;
1185 	case S_IFIFO:
1186 		if (dev == 0)
1187 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
1188 		/* FALLTHROUGH */
1189 	default:
1190 		error = EINVAL;
1191 		break;
1192 	}
1193 	if (error != 0)
1194 		return (error);
1195 restart:
1196 	bwillwrite();
1197 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
1198 	    NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
1199 	    td);
1200 	if ((error = namei(&nd)) != 0)
1201 		return (error);
1202 	vp = nd.ni_vp;
1203 	if (vp != NULL) {
1204 		NDFREE(&nd, NDF_ONLY_PNBUF);
1205 		if (vp == nd.ni_dvp)
1206 			vrele(nd.ni_dvp);
1207 		else
1208 			vput(nd.ni_dvp);
1209 		vrele(vp);
1210 		return (EEXIST);
1211 	} else {
1212 		VATTR_NULL(&vattr);
1213 		vattr.va_mode = (mode & ALLPERMS) &
1214 		    ~td->td_proc->p_fd->fd_cmask;
1215 		vattr.va_rdev = dev;
1216 		whiteout = 0;
1217 
1218 		switch (mode & S_IFMT) {
1219 		case S_IFMT:	/* used by badsect to flag bad sectors */
1220 			vattr.va_type = VBAD;
1221 			break;
1222 		case S_IFCHR:
1223 			vattr.va_type = VCHR;
1224 			break;
1225 		case S_IFBLK:
1226 			vattr.va_type = VBLK;
1227 			break;
1228 		case S_IFWHT:
1229 			whiteout = 1;
1230 			break;
1231 		default:
1232 			panic("kern_mknod: invalid mode");
1233 		}
1234 	}
1235 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1236 		NDFREE(&nd, NDF_ONLY_PNBUF);
1237 		vput(nd.ni_dvp);
1238 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1239 			return (error);
1240 		goto restart;
1241 	}
1242 #ifdef MAC
1243 	if (error == 0 && !whiteout)
1244 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
1245 		    &nd.ni_cnd, &vattr);
1246 #endif
1247 	if (error == 0) {
1248 		if (whiteout)
1249 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1250 		else {
1251 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1252 						&nd.ni_cnd, &vattr);
1253 			if (error == 0)
1254 				vput(nd.ni_vp);
1255 		}
1256 	}
1257 	NDFREE(&nd, NDF_ONLY_PNBUF);
1258 	vput(nd.ni_dvp);
1259 	vn_finished_write(mp);
1260 	return (error);
1261 }
1262 
1263 /*
1264  * Create a named pipe.
1265  */
1266 #ifndef _SYS_SYSPROTO_H_
1267 struct mkfifo_args {
1268 	char	*path;
1269 	int	mode;
1270 };
1271 #endif
1272 int
1273 sys_mkfifo(td, uap)
1274 	struct thread *td;
1275 	register struct mkfifo_args /* {
1276 		char *path;
1277 		int mode;
1278 	} */ *uap;
1279 {
1280 
1281 	return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
1282 	    uap->mode));
1283 }
1284 
1285 #ifndef _SYS_SYSPROTO_H_
1286 struct mkfifoat_args {
1287 	int	fd;
1288 	char	*path;
1289 	mode_t	mode;
1290 };
1291 #endif
1292 int
1293 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
1294 {
1295 
1296 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
1297 	    uap->mode));
1298 }
1299 
1300 int
1301 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1302     int mode)
1303 {
1304 	struct mount *mp;
1305 	struct vattr vattr;
1306 	struct nameidata nd;
1307 	cap_rights_t rights;
1308 	int error;
1309 
1310 	AUDIT_ARG_MODE(mode);
1311 restart:
1312 	bwillwrite();
1313 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
1314 	    NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
1315 	    td);
1316 	if ((error = namei(&nd)) != 0)
1317 		return (error);
1318 	if (nd.ni_vp != NULL) {
1319 		NDFREE(&nd, NDF_ONLY_PNBUF);
1320 		if (nd.ni_vp == nd.ni_dvp)
1321 			vrele(nd.ni_dvp);
1322 		else
1323 			vput(nd.ni_dvp);
1324 		vrele(nd.ni_vp);
1325 		return (EEXIST);
1326 	}
1327 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1328 		NDFREE(&nd, NDF_ONLY_PNBUF);
1329 		vput(nd.ni_dvp);
1330 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1331 			return (error);
1332 		goto restart;
1333 	}
1334 	VATTR_NULL(&vattr);
1335 	vattr.va_type = VFIFO;
1336 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1337 #ifdef MAC
1338 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1339 	    &vattr);
1340 	if (error != 0)
1341 		goto out;
1342 #endif
1343 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1344 	if (error == 0)
1345 		vput(nd.ni_vp);
1346 #ifdef MAC
1347 out:
1348 #endif
1349 	vput(nd.ni_dvp);
1350 	vn_finished_write(mp);
1351 	NDFREE(&nd, NDF_ONLY_PNBUF);
1352 	return (error);
1353 }
1354 
1355 /*
1356  * Make a hard file link.
1357  */
1358 #ifndef _SYS_SYSPROTO_H_
1359 struct link_args {
1360 	char	*path;
1361 	char	*link;
1362 };
1363 #endif
1364 int
1365 sys_link(td, uap)
1366 	struct thread *td;
1367 	register struct link_args /* {
1368 		char *path;
1369 		char *link;
1370 	} */ *uap;
1371 {
1372 
1373 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
1374 	    UIO_USERSPACE, FOLLOW));
1375 }
1376 
1377 #ifndef _SYS_SYSPROTO_H_
1378 struct linkat_args {
1379 	int	fd1;
1380 	char	*path1;
1381 	int	fd2;
1382 	char	*path2;
1383 	int	flag;
1384 };
1385 #endif
1386 int
1387 sys_linkat(struct thread *td, struct linkat_args *uap)
1388 {
1389 	int flag;
1390 
1391 	flag = uap->flag;
1392 	if (flag & ~AT_SYMLINK_FOLLOW)
1393 		return (EINVAL);
1394 
1395 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
1396 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
1397 }
1398 
1399 int hardlink_check_uid = 0;
1400 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1401     &hardlink_check_uid, 0,
1402     "Unprivileged processes cannot create hard links to files owned by other "
1403     "users");
1404 static int hardlink_check_gid = 0;
1405 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1406     &hardlink_check_gid, 0,
1407     "Unprivileged processes cannot create hard links to files owned by other "
1408     "groups");
1409 
1410 static int
1411 can_hardlink(struct vnode *vp, struct ucred *cred)
1412 {
1413 	struct vattr va;
1414 	int error;
1415 
1416 	if (!hardlink_check_uid && !hardlink_check_gid)
1417 		return (0);
1418 
1419 	error = VOP_GETATTR(vp, &va, cred);
1420 	if (error != 0)
1421 		return (error);
1422 
1423 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1424 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1425 		if (error != 0)
1426 			return (error);
1427 	}
1428 
1429 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1430 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
1431 		if (error != 0)
1432 			return (error);
1433 	}
1434 
1435 	return (0);
1436 }
1437 
1438 int
1439 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
1440     enum uio_seg segflg, int follow)
1441 {
1442 	struct vnode *vp;
1443 	struct mount *mp;
1444 	struct nameidata nd;
1445 	cap_rights_t rights;
1446 	int error;
1447 
1448 again:
1449 	bwillwrite();
1450 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1,
1451 	    cap_rights_init(&rights, CAP_LINKAT_SOURCE), td);
1452 
1453 	if ((error = namei(&nd)) != 0)
1454 		return (error);
1455 	NDFREE(&nd, NDF_ONLY_PNBUF);
1456 	vp = nd.ni_vp;
1457 	if (vp->v_type == VDIR) {
1458 		vrele(vp);
1459 		return (EPERM);		/* POSIX */
1460 	}
1461 	NDINIT_ATRIGHTS(&nd, CREATE,
1462 	    LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2,
1463 	    cap_rights_init(&rights, CAP_LINKAT_TARGET), td);
1464 	if ((error = namei(&nd)) == 0) {
1465 		if (nd.ni_vp != NULL) {
1466 			NDFREE(&nd, NDF_ONLY_PNBUF);
1467 			if (nd.ni_dvp == nd.ni_vp)
1468 				vrele(nd.ni_dvp);
1469 			else
1470 				vput(nd.ni_dvp);
1471 			vrele(nd.ni_vp);
1472 			vrele(vp);
1473 			return (EEXIST);
1474 		} else if (nd.ni_dvp->v_mount != vp->v_mount) {
1475 			/*
1476 			 * Cross-device link.  No need to recheck
1477 			 * vp->v_type, since it cannot change, except
1478 			 * to VBAD.
1479 			 */
1480 			NDFREE(&nd, NDF_ONLY_PNBUF);
1481 			vput(nd.ni_dvp);
1482 			vrele(vp);
1483 			return (EXDEV);
1484 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
1485 			error = can_hardlink(vp, td->td_ucred);
1486 #ifdef MAC
1487 			if (error == 0)
1488 				error = mac_vnode_check_link(td->td_ucred,
1489 				    nd.ni_dvp, vp, &nd.ni_cnd);
1490 #endif
1491 			if (error != 0) {
1492 				vput(vp);
1493 				vput(nd.ni_dvp);
1494 				NDFREE(&nd, NDF_ONLY_PNBUF);
1495 				return (error);
1496 			}
1497 			error = vn_start_write(vp, &mp, V_NOWAIT);
1498 			if (error != 0) {
1499 				vput(vp);
1500 				vput(nd.ni_dvp);
1501 				NDFREE(&nd, NDF_ONLY_PNBUF);
1502 				error = vn_start_write(NULL, &mp,
1503 				    V_XSLEEP | PCATCH);
1504 				if (error != 0)
1505 					return (error);
1506 				goto again;
1507 			}
1508 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1509 			VOP_UNLOCK(vp, 0);
1510 			vput(nd.ni_dvp);
1511 			vn_finished_write(mp);
1512 			NDFREE(&nd, NDF_ONLY_PNBUF);
1513 		} else {
1514 			vput(nd.ni_dvp);
1515 			NDFREE(&nd, NDF_ONLY_PNBUF);
1516 			vrele(vp);
1517 			goto again;
1518 		}
1519 	}
1520 	vrele(vp);
1521 	return (error);
1522 }
1523 
1524 /*
1525  * Make a symbolic link.
1526  */
1527 #ifndef _SYS_SYSPROTO_H_
1528 struct symlink_args {
1529 	char	*path;
1530 	char	*link;
1531 };
1532 #endif
1533 int
1534 sys_symlink(td, uap)
1535 	struct thread *td;
1536 	register struct symlink_args /* {
1537 		char *path;
1538 		char *link;
1539 	} */ *uap;
1540 {
1541 
1542 	return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
1543 	    UIO_USERSPACE));
1544 }
1545 
1546 #ifndef _SYS_SYSPROTO_H_
1547 struct symlinkat_args {
1548 	char	*path;
1549 	int	fd;
1550 	char	*path2;
1551 };
1552 #endif
1553 int
1554 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
1555 {
1556 
1557 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
1558 	    UIO_USERSPACE));
1559 }
1560 
1561 int
1562 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
1563     enum uio_seg segflg)
1564 {
1565 	struct mount *mp;
1566 	struct vattr vattr;
1567 	char *syspath;
1568 	struct nameidata nd;
1569 	int error;
1570 	cap_rights_t rights;
1571 
1572 	if (segflg == UIO_SYSSPACE) {
1573 		syspath = path1;
1574 	} else {
1575 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1576 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
1577 			goto out;
1578 	}
1579 	AUDIT_ARG_TEXT(syspath);
1580 restart:
1581 	bwillwrite();
1582 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
1583 	    NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
1584 	    td);
1585 	if ((error = namei(&nd)) != 0)
1586 		goto out;
1587 	if (nd.ni_vp) {
1588 		NDFREE(&nd, NDF_ONLY_PNBUF);
1589 		if (nd.ni_vp == nd.ni_dvp)
1590 			vrele(nd.ni_dvp);
1591 		else
1592 			vput(nd.ni_dvp);
1593 		vrele(nd.ni_vp);
1594 		error = EEXIST;
1595 		goto out;
1596 	}
1597 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1598 		NDFREE(&nd, NDF_ONLY_PNBUF);
1599 		vput(nd.ni_dvp);
1600 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1601 			goto out;
1602 		goto restart;
1603 	}
1604 	VATTR_NULL(&vattr);
1605 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1606 #ifdef MAC
1607 	vattr.va_type = VLNK;
1608 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1609 	    &vattr);
1610 	if (error != 0)
1611 		goto out2;
1612 #endif
1613 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1614 	if (error == 0)
1615 		vput(nd.ni_vp);
1616 #ifdef MAC
1617 out2:
1618 #endif
1619 	NDFREE(&nd, NDF_ONLY_PNBUF);
1620 	vput(nd.ni_dvp);
1621 	vn_finished_write(mp);
1622 out:
1623 	if (segflg != UIO_SYSSPACE)
1624 		uma_zfree(namei_zone, syspath);
1625 	return (error);
1626 }
1627 
1628 /*
1629  * Delete a whiteout from the filesystem.
1630  */
1631 int
1632 sys_undelete(td, uap)
1633 	struct thread *td;
1634 	register struct undelete_args /* {
1635 		char *path;
1636 	} */ *uap;
1637 {
1638 	struct mount *mp;
1639 	struct nameidata nd;
1640 	int error;
1641 
1642 restart:
1643 	bwillwrite();
1644 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
1645 	    UIO_USERSPACE, uap->path, td);
1646 	error = namei(&nd);
1647 	if (error != 0)
1648 		return (error);
1649 
1650 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1651 		NDFREE(&nd, NDF_ONLY_PNBUF);
1652 		if (nd.ni_vp == nd.ni_dvp)
1653 			vrele(nd.ni_dvp);
1654 		else
1655 			vput(nd.ni_dvp);
1656 		if (nd.ni_vp)
1657 			vrele(nd.ni_vp);
1658 		return (EEXIST);
1659 	}
1660 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1661 		NDFREE(&nd, NDF_ONLY_PNBUF);
1662 		vput(nd.ni_dvp);
1663 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1664 			return (error);
1665 		goto restart;
1666 	}
1667 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1668 	NDFREE(&nd, NDF_ONLY_PNBUF);
1669 	vput(nd.ni_dvp);
1670 	vn_finished_write(mp);
1671 	return (error);
1672 }
1673 
1674 /*
1675  * Delete a name from the filesystem.
1676  */
1677 #ifndef _SYS_SYSPROTO_H_
1678 struct unlink_args {
1679 	char	*path;
1680 };
1681 #endif
1682 int
1683 sys_unlink(td, uap)
1684 	struct thread *td;
1685 	struct unlink_args /* {
1686 		char *path;
1687 	} */ *uap;
1688 {
1689 
1690 	return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
1691 }
1692 
1693 #ifndef _SYS_SYSPROTO_H_
1694 struct unlinkat_args {
1695 	int	fd;
1696 	char	*path;
1697 	int	flag;
1698 };
1699 #endif
1700 int
1701 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
1702 {
1703 	int flag = uap->flag;
1704 	int fd = uap->fd;
1705 	char *path = uap->path;
1706 
1707 	if (flag & ~AT_REMOVEDIR)
1708 		return (EINVAL);
1709 
1710 	if (flag & AT_REMOVEDIR)
1711 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
1712 	else
1713 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
1714 }
1715 
1716 int
1717 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1718     ino_t oldinum)
1719 {
1720 	struct mount *mp;
1721 	struct vnode *vp;
1722 	struct nameidata nd;
1723 	struct stat sb;
1724 	cap_rights_t rights;
1725 	int error;
1726 
1727 restart:
1728 	bwillwrite();
1729 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
1730 	    pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
1731 	if ((error = namei(&nd)) != 0)
1732 		return (error == EINVAL ? EPERM : error);
1733 	vp = nd.ni_vp;
1734 	if (vp->v_type == VDIR && oldinum == 0) {
1735 		error = EPERM;		/* POSIX */
1736 	} else if (oldinum != 0 &&
1737 		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
1738 		  sb.st_ino != oldinum) {
1739 			error = EIDRM;	/* Identifier removed */
1740 	} else {
1741 		/*
1742 		 * The root of a mounted filesystem cannot be deleted.
1743 		 *
1744 		 * XXX: can this only be a VDIR case?
1745 		 */
1746 		if (vp->v_vflag & VV_ROOT)
1747 			error = EBUSY;
1748 	}
1749 	if (error == 0) {
1750 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1751 			NDFREE(&nd, NDF_ONLY_PNBUF);
1752 			vput(nd.ni_dvp);
1753 			if (vp == nd.ni_dvp)
1754 				vrele(vp);
1755 			else
1756 				vput(vp);
1757 			if ((error = vn_start_write(NULL, &mp,
1758 			    V_XSLEEP | PCATCH)) != 0)
1759 				return (error);
1760 			goto restart;
1761 		}
1762 #ifdef MAC
1763 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
1764 		    &nd.ni_cnd);
1765 		if (error != 0)
1766 			goto out;
1767 #endif
1768 		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
1769 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1770 #ifdef MAC
1771 out:
1772 #endif
1773 		vn_finished_write(mp);
1774 	}
1775 	NDFREE(&nd, NDF_ONLY_PNBUF);
1776 	vput(nd.ni_dvp);
1777 	if (vp == nd.ni_dvp)
1778 		vrele(vp);
1779 	else
1780 		vput(vp);
1781 	return (error);
1782 }
1783 
1784 /*
1785  * Reposition read/write file offset.
1786  */
1787 #ifndef _SYS_SYSPROTO_H_
1788 struct lseek_args {
1789 	int	fd;
1790 	int	pad;
1791 	off_t	offset;
1792 	int	whence;
1793 };
1794 #endif
1795 int
1796 sys_lseek(td, uap)
1797 	struct thread *td;
1798 	register struct lseek_args /* {
1799 		int fd;
1800 		int pad;
1801 		off_t offset;
1802 		int whence;
1803 	} */ *uap;
1804 {
1805 	struct file *fp;
1806 	cap_rights_t rights;
1807 	int error;
1808 
1809 	AUDIT_ARG_FD(uap->fd);
1810 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp);
1811 	if (error != 0)
1812 		return (error);
1813 	error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
1814 	    fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE;
1815 	fdrop(fp, td);
1816 	return (error);
1817 }
1818 
1819 #if defined(COMPAT_43)
1820 /*
1821  * Reposition read/write file offset.
1822  */
1823 #ifndef _SYS_SYSPROTO_H_
1824 struct olseek_args {
1825 	int	fd;
1826 	long	offset;
1827 	int	whence;
1828 };
1829 #endif
1830 int
1831 olseek(td, uap)
1832 	struct thread *td;
1833 	register struct olseek_args /* {
1834 		int fd;
1835 		long offset;
1836 		int whence;
1837 	} */ *uap;
1838 {
1839 	struct lseek_args /* {
1840 		int fd;
1841 		int pad;
1842 		off_t offset;
1843 		int whence;
1844 	} */ nuap;
1845 
1846 	nuap.fd = uap->fd;
1847 	nuap.offset = uap->offset;
1848 	nuap.whence = uap->whence;
1849 	return (sys_lseek(td, &nuap));
1850 }
1851 #endif /* COMPAT_43 */
1852 
1853 #if defined(COMPAT_FREEBSD6)
1854 /* Version with the 'pad' argument */
1855 int
1856 freebsd6_lseek(td, uap)
1857 	struct thread *td;
1858 	register struct freebsd6_lseek_args *uap;
1859 {
1860 	struct lseek_args ouap;
1861 
1862 	ouap.fd = uap->fd;
1863 	ouap.offset = uap->offset;
1864 	ouap.whence = uap->whence;
1865 	return (sys_lseek(td, &ouap));
1866 }
1867 #endif
1868 
1869 /*
1870  * Check access permissions using passed credentials.
1871  */
1872 static int
1873 vn_access(vp, user_flags, cred, td)
1874 	struct vnode	*vp;
1875 	int		user_flags;
1876 	struct ucred	*cred;
1877 	struct thread	*td;
1878 {
1879 	accmode_t accmode;
1880 	int error;
1881 
1882 	/* Flags == 0 means only check for existence. */
1883 	if (user_flags == 0)
1884 		return (0);
1885 
1886 	accmode = 0;
1887 	if (user_flags & R_OK)
1888 		accmode |= VREAD;
1889 	if (user_flags & W_OK)
1890 		accmode |= VWRITE;
1891 	if (user_flags & X_OK)
1892 		accmode |= VEXEC;
1893 #ifdef MAC
1894 	error = mac_vnode_check_access(cred, vp, accmode);
1895 	if (error != 0)
1896 		return (error);
1897 #endif
1898 	if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1899 		error = VOP_ACCESS(vp, accmode, cred, td);
1900 	return (error);
1901 }
1902 
1903 /*
1904  * Check access permissions using "real" credentials.
1905  */
1906 #ifndef _SYS_SYSPROTO_H_
1907 struct access_args {
1908 	char	*path;
1909 	int	amode;
1910 };
1911 #endif
1912 int
1913 sys_access(td, uap)
1914 	struct thread *td;
1915 	register struct access_args /* {
1916 		char *path;
1917 		int amode;
1918 	} */ *uap;
1919 {
1920 
1921 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
1922 	    0, uap->amode));
1923 }
1924 
1925 #ifndef _SYS_SYSPROTO_H_
1926 struct faccessat_args {
1927 	int	dirfd;
1928 	char	*path;
1929 	int	amode;
1930 	int	flag;
1931 }
1932 #endif
1933 int
1934 sys_faccessat(struct thread *td, struct faccessat_args *uap)
1935 {
1936 
1937 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
1938 	    uap->amode));
1939 }
1940 
1941 int
1942 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
1943     int flag, int amode)
1944 {
1945 	struct ucred *cred, *usecred;
1946 	struct vnode *vp;
1947 	struct nameidata nd;
1948 	cap_rights_t rights;
1949 	int error;
1950 
1951 	if (flag & ~AT_EACCESS)
1952 		return (EINVAL);
1953 	if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
1954 		return (EINVAL);
1955 
1956 	/*
1957 	 * Create and modify a temporary credential instead of one that
1958 	 * is potentially shared (if we need one).
1959 	 */
1960 	cred = td->td_ucred;
1961 	if ((flag & AT_EACCESS) == 0 &&
1962 	    ((cred->cr_uid != cred->cr_ruid ||
1963 	    cred->cr_rgid != cred->cr_groups[0]))) {
1964 		usecred = crdup(cred);
1965 		usecred->cr_uid = cred->cr_ruid;
1966 		usecred->cr_groups[0] = cred->cr_rgid;
1967 		td->td_ucred = usecred;
1968 	} else
1969 		usecred = cred;
1970 	AUDIT_ARG_VALUE(amode);
1971 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
1972 	    AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
1973 	    td);
1974 	if ((error = namei(&nd)) != 0)
1975 		goto out;
1976 	vp = nd.ni_vp;
1977 
1978 	error = vn_access(vp, amode, usecred, td);
1979 	NDFREE(&nd, NDF_ONLY_PNBUF);
1980 	vput(vp);
1981 out:
1982 	if (usecred != cred) {
1983 		td->td_ucred = cred;
1984 		crfree(usecred);
1985 	}
1986 	return (error);
1987 }
1988 
1989 /*
1990  * Check access permissions using "effective" credentials.
1991  */
1992 #ifndef _SYS_SYSPROTO_H_
1993 struct eaccess_args {
1994 	char	*path;
1995 	int	amode;
1996 };
1997 #endif
1998 int
1999 sys_eaccess(td, uap)
2000 	struct thread *td;
2001 	register struct eaccess_args /* {
2002 		char *path;
2003 		int amode;
2004 	} */ *uap;
2005 {
2006 
2007 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2008 	    AT_EACCESS, uap->amode));
2009 }
2010 
2011 #if defined(COMPAT_43)
2012 /*
2013  * Get file status; this version follows links.
2014  */
2015 #ifndef _SYS_SYSPROTO_H_
2016 struct ostat_args {
2017 	char	*path;
2018 	struct ostat *ub;
2019 };
2020 #endif
2021 int
2022 ostat(td, uap)
2023 	struct thread *td;
2024 	register struct ostat_args /* {
2025 		char *path;
2026 		struct ostat *ub;
2027 	} */ *uap;
2028 {
2029 	struct stat sb;
2030 	struct ostat osb;
2031 	int error;
2032 
2033 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
2034 	    &sb, NULL);
2035 	if (error != 0)
2036 		return (error);
2037 	cvtstat(&sb, &osb);
2038 	return (copyout(&osb, uap->ub, sizeof (osb)));
2039 }
2040 
2041 /*
2042  * Get file status; this version does not follow links.
2043  */
2044 #ifndef _SYS_SYSPROTO_H_
2045 struct olstat_args {
2046 	char	*path;
2047 	struct ostat *ub;
2048 };
2049 #endif
2050 int
2051 olstat(td, uap)
2052 	struct thread *td;
2053 	register struct olstat_args /* {
2054 		char *path;
2055 		struct ostat *ub;
2056 	} */ *uap;
2057 {
2058 	struct stat sb;
2059 	struct ostat osb;
2060 	int error;
2061 
2062 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
2063 	    UIO_USERSPACE, &sb, NULL);
2064 	if (error != 0)
2065 		return (error);
2066 	cvtstat(&sb, &osb);
2067 	return (copyout(&osb, uap->ub, sizeof (osb)));
2068 }
2069 
2070 /*
2071  * Convert from an old to a new stat structure.
2072  */
2073 void
2074 cvtstat(st, ost)
2075 	struct stat *st;
2076 	struct ostat *ost;
2077 {
2078 
2079 	ost->st_dev = st->st_dev;
2080 	ost->st_ino = st->st_ino;
2081 	ost->st_mode = st->st_mode;
2082 	ost->st_nlink = st->st_nlink;
2083 	ost->st_uid = st->st_uid;
2084 	ost->st_gid = st->st_gid;
2085 	ost->st_rdev = st->st_rdev;
2086 	if (st->st_size < (quad_t)1 << 32)
2087 		ost->st_size = st->st_size;
2088 	else
2089 		ost->st_size = -2;
2090 	ost->st_atim = st->st_atim;
2091 	ost->st_mtim = st->st_mtim;
2092 	ost->st_ctim = st->st_ctim;
2093 	ost->st_blksize = st->st_blksize;
2094 	ost->st_blocks = st->st_blocks;
2095 	ost->st_flags = st->st_flags;
2096 	ost->st_gen = st->st_gen;
2097 }
2098 #endif /* COMPAT_43 */
2099 
2100 /*
2101  * Get file status; this version follows links.
2102  */
2103 #ifndef _SYS_SYSPROTO_H_
2104 struct stat_args {
2105 	char	*path;
2106 	struct stat *ub;
2107 };
2108 #endif
2109 int
2110 sys_stat(td, uap)
2111 	struct thread *td;
2112 	register struct stat_args /* {
2113 		char *path;
2114 		struct stat *ub;
2115 	} */ *uap;
2116 {
2117 	struct stat sb;
2118 	int error;
2119 
2120 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
2121 	    &sb, NULL);
2122 	if (error == 0)
2123 		error = copyout(&sb, uap->ub, sizeof (sb));
2124 	return (error);
2125 }
2126 
2127 #ifndef _SYS_SYSPROTO_H_
2128 struct fstatat_args {
2129 	int	fd;
2130 	char	*path;
2131 	struct stat	*buf;
2132 	int	flag;
2133 }
2134 #endif
2135 int
2136 sys_fstatat(struct thread *td, struct fstatat_args *uap)
2137 {
2138 	struct stat sb;
2139 	int error;
2140 
2141 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
2142 	    UIO_USERSPACE, &sb, NULL);
2143 	if (error == 0)
2144 		error = copyout(&sb, uap->buf, sizeof (sb));
2145 	return (error);
2146 }
2147 
2148 int
2149 kern_statat(struct thread *td, int flag, int fd, char *path,
2150     enum uio_seg pathseg, struct stat *sbp,
2151     void (*hook)(struct vnode *vp, struct stat *sbp))
2152 {
2153 	struct nameidata nd;
2154 	struct stat sb;
2155 	cap_rights_t rights;
2156 	int error;
2157 
2158 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2159 		return (EINVAL);
2160 
2161 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
2162 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
2163 	    cap_rights_init(&rights, CAP_FSTAT), td);
2164 
2165 	if ((error = namei(&nd)) != 0)
2166 		return (error);
2167 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2168 	if (error == 0) {
2169 		SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
2170 		if (S_ISREG(sb.st_mode))
2171 			SDT_PROBE2(vfs, , stat, reg, path, pathseg);
2172 		if (__predict_false(hook != NULL))
2173 			hook(nd.ni_vp, &sb);
2174 	}
2175 	NDFREE(&nd, NDF_ONLY_PNBUF);
2176 	vput(nd.ni_vp);
2177 	if (error != 0)
2178 		return (error);
2179 	*sbp = sb;
2180 #ifdef KTRACE
2181 	if (KTRPOINT(td, KTR_STRUCT))
2182 		ktrstat(&sb);
2183 #endif
2184 	return (0);
2185 }
2186 
2187 /*
2188  * Get file status; this version does not follow links.
2189  */
2190 #ifndef _SYS_SYSPROTO_H_
2191 struct lstat_args {
2192 	char	*path;
2193 	struct stat *ub;
2194 };
2195 #endif
2196 int
2197 sys_lstat(td, uap)
2198 	struct thread *td;
2199 	register struct lstat_args /* {
2200 		char *path;
2201 		struct stat *ub;
2202 	} */ *uap;
2203 {
2204 	struct stat sb;
2205 	int error;
2206 
2207 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
2208 	    UIO_USERSPACE, &sb, NULL);
2209 	if (error == 0)
2210 		error = copyout(&sb, uap->ub, sizeof (sb));
2211 	return (error);
2212 }
2213 
2214 /*
2215  * Implementation of the NetBSD [l]stat() functions.
2216  */
2217 void
2218 cvtnstat(sb, nsb)
2219 	struct stat *sb;
2220 	struct nstat *nsb;
2221 {
2222 
2223 	bzero(nsb, sizeof *nsb);
2224 	nsb->st_dev = sb->st_dev;
2225 	nsb->st_ino = sb->st_ino;
2226 	nsb->st_mode = sb->st_mode;
2227 	nsb->st_nlink = sb->st_nlink;
2228 	nsb->st_uid = sb->st_uid;
2229 	nsb->st_gid = sb->st_gid;
2230 	nsb->st_rdev = sb->st_rdev;
2231 	nsb->st_atim = sb->st_atim;
2232 	nsb->st_mtim = sb->st_mtim;
2233 	nsb->st_ctim = sb->st_ctim;
2234 	nsb->st_size = sb->st_size;
2235 	nsb->st_blocks = sb->st_blocks;
2236 	nsb->st_blksize = sb->st_blksize;
2237 	nsb->st_flags = sb->st_flags;
2238 	nsb->st_gen = sb->st_gen;
2239 	nsb->st_birthtim = sb->st_birthtim;
2240 }
2241 
2242 #ifndef _SYS_SYSPROTO_H_
2243 struct nstat_args {
2244 	char	*path;
2245 	struct nstat *ub;
2246 };
2247 #endif
2248 int
2249 sys_nstat(td, uap)
2250 	struct thread *td;
2251 	register struct nstat_args /* {
2252 		char *path;
2253 		struct nstat *ub;
2254 	} */ *uap;
2255 {
2256 	struct stat sb;
2257 	struct nstat nsb;
2258 	int error;
2259 
2260 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
2261 	    &sb, NULL);
2262 	if (error != 0)
2263 		return (error);
2264 	cvtnstat(&sb, &nsb);
2265 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
2266 }
2267 
2268 /*
2269  * NetBSD lstat.  Get file status; this version does not follow links.
2270  */
2271 #ifndef _SYS_SYSPROTO_H_
2272 struct lstat_args {
2273 	char	*path;
2274 	struct stat *ub;
2275 };
2276 #endif
2277 int
2278 sys_nlstat(td, uap)
2279 	struct thread *td;
2280 	register struct nlstat_args /* {
2281 		char *path;
2282 		struct nstat *ub;
2283 	} */ *uap;
2284 {
2285 	struct stat sb;
2286 	struct nstat nsb;
2287 	int error;
2288 
2289 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
2290 	    UIO_USERSPACE, &sb, NULL);
2291 	if (error != 0)
2292 		return (error);
2293 	cvtnstat(&sb, &nsb);
2294 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
2295 }
2296 
2297 /*
2298  * Get configurable pathname variables.
2299  */
2300 #ifndef _SYS_SYSPROTO_H_
2301 struct pathconf_args {
2302 	char	*path;
2303 	int	name;
2304 };
2305 #endif
2306 int
2307 sys_pathconf(td, uap)
2308 	struct thread *td;
2309 	register struct pathconf_args /* {
2310 		char *path;
2311 		int name;
2312 	} */ *uap;
2313 {
2314 
2315 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
2316 }
2317 
2318 #ifndef _SYS_SYSPROTO_H_
2319 struct lpathconf_args {
2320 	char	*path;
2321 	int	name;
2322 };
2323 #endif
2324 int
2325 sys_lpathconf(td, uap)
2326 	struct thread *td;
2327 	register struct lpathconf_args /* {
2328 		char *path;
2329 		int name;
2330 	} */ *uap;
2331 {
2332 
2333 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
2334 	    NOFOLLOW));
2335 }
2336 
2337 int
2338 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
2339     u_long flags)
2340 {
2341 	struct nameidata nd;
2342 	int error;
2343 
2344 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
2345 	    pathseg, path, td);
2346 	if ((error = namei(&nd)) != 0)
2347 		return (error);
2348 	NDFREE(&nd, NDF_ONLY_PNBUF);
2349 
2350 	/* If asynchronous I/O is available, it works for all files. */
2351 	if (name == _PC_ASYNC_IO)
2352 		td->td_retval[0] = async_io_version;
2353 	else
2354 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2355 	vput(nd.ni_vp);
2356 	return (error);
2357 }
2358 
2359 /*
2360  * Return target name of a symbolic link.
2361  */
2362 #ifndef _SYS_SYSPROTO_H_
2363 struct readlink_args {
2364 	char	*path;
2365 	char	*buf;
2366 	size_t	count;
2367 };
2368 #endif
2369 int
2370 sys_readlink(td, uap)
2371 	struct thread *td;
2372 	register struct readlink_args /* {
2373 		char *path;
2374 		char *buf;
2375 		size_t count;
2376 	} */ *uap;
2377 {
2378 
2379 	return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2380 	    uap->buf, UIO_USERSPACE, uap->count));
2381 }
2382 #ifndef _SYS_SYSPROTO_H_
2383 struct readlinkat_args {
2384 	int	fd;
2385 	char	*path;
2386 	char	*buf;
2387 	size_t	bufsize;
2388 };
2389 #endif
2390 int
2391 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
2392 {
2393 
2394 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
2395 	    uap->buf, UIO_USERSPACE, uap->bufsize));
2396 }
2397 
2398 int
2399 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2400     char *buf, enum uio_seg bufseg, size_t count)
2401 {
2402 	struct vnode *vp;
2403 	struct iovec aiov;
2404 	struct uio auio;
2405 	struct nameidata nd;
2406 	int error;
2407 
2408 	if (count > IOSIZE_MAX)
2409 		return (EINVAL);
2410 
2411 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
2412 	    pathseg, path, fd, td);
2413 
2414 	if ((error = namei(&nd)) != 0)
2415 		return (error);
2416 	NDFREE(&nd, NDF_ONLY_PNBUF);
2417 	vp = nd.ni_vp;
2418 #ifdef MAC
2419 	error = mac_vnode_check_readlink(td->td_ucred, vp);
2420 	if (error != 0) {
2421 		vput(vp);
2422 		return (error);
2423 	}
2424 #endif
2425 	if (vp->v_type != VLNK)
2426 		error = EINVAL;
2427 	else {
2428 		aiov.iov_base = buf;
2429 		aiov.iov_len = count;
2430 		auio.uio_iov = &aiov;
2431 		auio.uio_iovcnt = 1;
2432 		auio.uio_offset = 0;
2433 		auio.uio_rw = UIO_READ;
2434 		auio.uio_segflg = bufseg;
2435 		auio.uio_td = td;
2436 		auio.uio_resid = count;
2437 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2438 		td->td_retval[0] = count - auio.uio_resid;
2439 	}
2440 	vput(vp);
2441 	return (error);
2442 }
2443 
2444 /*
2445  * Common implementation code for chflags() and fchflags().
2446  */
2447 static int
2448 setfflags(td, vp, flags)
2449 	struct thread *td;
2450 	struct vnode *vp;
2451 	u_long flags;
2452 {
2453 	struct mount *mp;
2454 	struct vattr vattr;
2455 	int error;
2456 
2457 	/* We can't support the value matching VNOVAL. */
2458 	if (flags == VNOVAL)
2459 		return (EOPNOTSUPP);
2460 
2461 	/*
2462 	 * Prevent non-root users from setting flags on devices.  When
2463 	 * a device is reused, users can retain ownership of the device
2464 	 * if they are allowed to set flags and programs assume that
2465 	 * chown can't fail when done as root.
2466 	 */
2467 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2468 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
2469 		if (error != 0)
2470 			return (error);
2471 	}
2472 
2473 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2474 		return (error);
2475 	VATTR_NULL(&vattr);
2476 	vattr.va_flags = flags;
2477 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2478 #ifdef MAC
2479 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
2480 	if (error == 0)
2481 #endif
2482 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
2483 	VOP_UNLOCK(vp, 0);
2484 	vn_finished_write(mp);
2485 	return (error);
2486 }
2487 
2488 /*
2489  * Change flags of a file given a path name.
2490  */
2491 #ifndef _SYS_SYSPROTO_H_
2492 struct chflags_args {
2493 	const char *path;
2494 	u_long	flags;
2495 };
2496 #endif
2497 int
2498 sys_chflags(td, uap)
2499 	struct thread *td;
2500 	register struct chflags_args /* {
2501 		const char *path;
2502 		u_long flags;
2503 	} */ *uap;
2504 {
2505 
2506 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2507 	    uap->flags, 0));
2508 }
2509 
2510 #ifndef _SYS_SYSPROTO_H_
2511 struct chflagsat_args {
2512 	int	fd;
2513 	const char *path;
2514 	u_long	flags;
2515 	int	atflag;
2516 }
2517 #endif
2518 int
2519 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
2520 {
2521 	int fd = uap->fd;
2522 	const char *path = uap->path;
2523 	u_long flags = uap->flags;
2524 	int atflag = uap->atflag;
2525 
2526 	if (atflag & ~AT_SYMLINK_NOFOLLOW)
2527 		return (EINVAL);
2528 
2529 	return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
2530 }
2531 
2532 /*
2533  * Same as chflags() but doesn't follow symlinks.
2534  */
2535 int
2536 sys_lchflags(td, uap)
2537 	struct thread *td;
2538 	register struct lchflags_args /* {
2539 		const char *path;
2540 		u_long flags;
2541 	} */ *uap;
2542 {
2543 
2544 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2545 	    uap->flags, AT_SYMLINK_NOFOLLOW));
2546 }
2547 
2548 static int
2549 kern_chflagsat(struct thread *td, int fd, const char *path,
2550     enum uio_seg pathseg, u_long flags, int atflag)
2551 {
2552 	struct nameidata nd;
2553 	cap_rights_t rights;
2554 	int error, follow;
2555 
2556 	AUDIT_ARG_FFLAGS(flags);
2557 	follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2558 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
2559 	    cap_rights_init(&rights, CAP_FCHFLAGS), td);
2560 	if ((error = namei(&nd)) != 0)
2561 		return (error);
2562 	NDFREE(&nd, NDF_ONLY_PNBUF);
2563 	error = setfflags(td, nd.ni_vp, flags);
2564 	vrele(nd.ni_vp);
2565 	return (error);
2566 }
2567 
2568 /*
2569  * Change flags of a file given a file descriptor.
2570  */
2571 #ifndef _SYS_SYSPROTO_H_
2572 struct fchflags_args {
2573 	int	fd;
2574 	u_long	flags;
2575 };
2576 #endif
2577 int
2578 sys_fchflags(td, uap)
2579 	struct thread *td;
2580 	register struct fchflags_args /* {
2581 		int fd;
2582 		u_long flags;
2583 	} */ *uap;
2584 {
2585 	struct file *fp;
2586 	cap_rights_t rights;
2587 	int error;
2588 
2589 	AUDIT_ARG_FD(uap->fd);
2590 	AUDIT_ARG_FFLAGS(uap->flags);
2591 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS),
2592 	    &fp);
2593 	if (error != 0)
2594 		return (error);
2595 #ifdef AUDIT
2596 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
2597 	AUDIT_ARG_VNODE1(fp->f_vnode);
2598 	VOP_UNLOCK(fp->f_vnode, 0);
2599 #endif
2600 	error = setfflags(td, fp->f_vnode, uap->flags);
2601 	fdrop(fp, td);
2602 	return (error);
2603 }
2604 
2605 /*
2606  * Common implementation code for chmod(), lchmod() and fchmod().
2607  */
2608 int
2609 setfmode(td, cred, vp, mode)
2610 	struct thread *td;
2611 	struct ucred *cred;
2612 	struct vnode *vp;
2613 	int mode;
2614 {
2615 	struct mount *mp;
2616 	struct vattr vattr;
2617 	int error;
2618 
2619 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2620 		return (error);
2621 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2622 	VATTR_NULL(&vattr);
2623 	vattr.va_mode = mode & ALLPERMS;
2624 #ifdef MAC
2625 	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
2626 	if (error == 0)
2627 #endif
2628 		error = VOP_SETATTR(vp, &vattr, cred);
2629 	VOP_UNLOCK(vp, 0);
2630 	vn_finished_write(mp);
2631 	return (error);
2632 }
2633 
2634 /*
2635  * Change mode of a file given path name.
2636  */
2637 #ifndef _SYS_SYSPROTO_H_
2638 struct chmod_args {
2639 	char	*path;
2640 	int	mode;
2641 };
2642 #endif
2643 int
2644 sys_chmod(td, uap)
2645 	struct thread *td;
2646 	register struct chmod_args /* {
2647 		char *path;
2648 		int mode;
2649 	} */ *uap;
2650 {
2651 
2652 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2653 	    uap->mode, 0));
2654 }
2655 
2656 #ifndef _SYS_SYSPROTO_H_
2657 struct fchmodat_args {
2658 	int	dirfd;
2659 	char	*path;
2660 	mode_t	mode;
2661 	int	flag;
2662 }
2663 #endif
2664 int
2665 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
2666 {
2667 	int flag = uap->flag;
2668 	int fd = uap->fd;
2669 	char *path = uap->path;
2670 	mode_t mode = uap->mode;
2671 
2672 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2673 		return (EINVAL);
2674 
2675 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
2676 }
2677 
2678 /*
2679  * Change mode of a file given path name (don't follow links.)
2680  */
2681 #ifndef _SYS_SYSPROTO_H_
2682 struct lchmod_args {
2683 	char	*path;
2684 	int	mode;
2685 };
2686 #endif
2687 int
2688 sys_lchmod(td, uap)
2689 	struct thread *td;
2690 	register struct lchmod_args /* {
2691 		char *path;
2692 		int mode;
2693 	} */ *uap;
2694 {
2695 
2696 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2697 	    uap->mode, AT_SYMLINK_NOFOLLOW));
2698 }
2699 
2700 int
2701 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2702     mode_t mode, int flag)
2703 {
2704 	struct nameidata nd;
2705 	cap_rights_t rights;
2706 	int error, follow;
2707 
2708 	AUDIT_ARG_MODE(mode);
2709 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2710 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
2711 	    cap_rights_init(&rights, CAP_FCHMOD), td);
2712 	if ((error = namei(&nd)) != 0)
2713 		return (error);
2714 	NDFREE(&nd, NDF_ONLY_PNBUF);
2715 	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
2716 	vrele(nd.ni_vp);
2717 	return (error);
2718 }
2719 
2720 /*
2721  * Change mode of a file given a file descriptor.
2722  */
2723 #ifndef _SYS_SYSPROTO_H_
2724 struct fchmod_args {
2725 	int	fd;
2726 	int	mode;
2727 };
2728 #endif
2729 int
2730 sys_fchmod(struct thread *td, struct fchmod_args *uap)
2731 {
2732 	struct file *fp;
2733 	cap_rights_t rights;
2734 	int error;
2735 
2736 	AUDIT_ARG_FD(uap->fd);
2737 	AUDIT_ARG_MODE(uap->mode);
2738 
2739 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
2740 	if (error != 0)
2741 		return (error);
2742 	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
2743 	fdrop(fp, td);
2744 	return (error);
2745 }
2746 
2747 /*
2748  * Common implementation for chown(), lchown(), and fchown()
2749  */
2750 int
2751 setfown(td, cred, vp, uid, gid)
2752 	struct thread *td;
2753 	struct ucred *cred;
2754 	struct vnode *vp;
2755 	uid_t uid;
2756 	gid_t gid;
2757 {
2758 	struct mount *mp;
2759 	struct vattr vattr;
2760 	int error;
2761 
2762 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2763 		return (error);
2764 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2765 	VATTR_NULL(&vattr);
2766 	vattr.va_uid = uid;
2767 	vattr.va_gid = gid;
2768 #ifdef MAC
2769 	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
2770 	    vattr.va_gid);
2771 	if (error == 0)
2772 #endif
2773 		error = VOP_SETATTR(vp, &vattr, cred);
2774 	VOP_UNLOCK(vp, 0);
2775 	vn_finished_write(mp);
2776 	return (error);
2777 }
2778 
2779 /*
2780  * Set ownership given a path name.
2781  */
2782 #ifndef _SYS_SYSPROTO_H_
2783 struct chown_args {
2784 	char	*path;
2785 	int	uid;
2786 	int	gid;
2787 };
2788 #endif
2789 int
2790 sys_chown(td, uap)
2791 	struct thread *td;
2792 	register struct chown_args /* {
2793 		char *path;
2794 		int uid;
2795 		int gid;
2796 	} */ *uap;
2797 {
2798 
2799 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
2800 	    uap->gid, 0));
2801 }
2802 
2803 #ifndef _SYS_SYSPROTO_H_
2804 struct fchownat_args {
2805 	int fd;
2806 	const char * path;
2807 	uid_t uid;
2808 	gid_t gid;
2809 	int flag;
2810 };
2811 #endif
2812 int
2813 sys_fchownat(struct thread *td, struct fchownat_args *uap)
2814 {
2815 	int flag;
2816 
2817 	flag = uap->flag;
2818 	if (flag & ~AT_SYMLINK_NOFOLLOW)
2819 		return (EINVAL);
2820 
2821 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
2822 	    uap->gid, uap->flag));
2823 }
2824 
2825 int
2826 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
2827     int uid, int gid, int flag)
2828 {
2829 	struct nameidata nd;
2830 	cap_rights_t rights;
2831 	int error, follow;
2832 
2833 	AUDIT_ARG_OWNER(uid, gid);
2834 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
2835 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
2836 	    cap_rights_init(&rights, CAP_FCHOWN), td);
2837 
2838 	if ((error = namei(&nd)) != 0)
2839 		return (error);
2840 	NDFREE(&nd, NDF_ONLY_PNBUF);
2841 	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
2842 	vrele(nd.ni_vp);
2843 	return (error);
2844 }
2845 
2846 /*
2847  * Set ownership given a path name, do not cross symlinks.
2848  */
2849 #ifndef _SYS_SYSPROTO_H_
2850 struct lchown_args {
2851 	char	*path;
2852 	int	uid;
2853 	int	gid;
2854 };
2855 #endif
2856 int
2857 sys_lchown(td, uap)
2858 	struct thread *td;
2859 	register struct lchown_args /* {
2860 		char *path;
2861 		int uid;
2862 		int gid;
2863 	} */ *uap;
2864 {
2865 
2866 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
2867 	    uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
2868 }
2869 
2870 /*
2871  * Set ownership given a file descriptor.
2872  */
2873 #ifndef _SYS_SYSPROTO_H_
2874 struct fchown_args {
2875 	int	fd;
2876 	int	uid;
2877 	int	gid;
2878 };
2879 #endif
2880 int
2881 sys_fchown(td, uap)
2882 	struct thread *td;
2883 	register struct fchown_args /* {
2884 		int fd;
2885 		int uid;
2886 		int gid;
2887 	} */ *uap;
2888 {
2889 	struct file *fp;
2890 	cap_rights_t rights;
2891 	int error;
2892 
2893 	AUDIT_ARG_FD(uap->fd);
2894 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
2895 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
2896 	if (error != 0)
2897 		return (error);
2898 	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
2899 	fdrop(fp, td);
2900 	return (error);
2901 }
2902 
2903 /*
2904  * Common implementation code for utimes(), lutimes(), and futimes().
2905  */
2906 static int
2907 getutimes(usrtvp, tvpseg, tsp)
2908 	const struct timeval *usrtvp;
2909 	enum uio_seg tvpseg;
2910 	struct timespec *tsp;
2911 {
2912 	struct timeval tv[2];
2913 	const struct timeval *tvp;
2914 	int error;
2915 
2916 	if (usrtvp == NULL) {
2917 		vfs_timestamp(&tsp[0]);
2918 		tsp[1] = tsp[0];
2919 	} else {
2920 		if (tvpseg == UIO_SYSSPACE) {
2921 			tvp = usrtvp;
2922 		} else {
2923 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2924 				return (error);
2925 			tvp = tv;
2926 		}
2927 
2928 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2929 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2930 			return (EINVAL);
2931 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2932 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2933 	}
2934 	return (0);
2935 }
2936 
2937 /*
2938  * Common implementation code for futimens(), utimensat().
2939  */
2940 #define	UTIMENS_NULL	0x1
2941 #define	UTIMENS_EXIT	0x2
2942 static int
2943 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
2944     struct timespec *tsp, int *retflags)
2945 {
2946 	struct timespec tsnow;
2947 	int error;
2948 
2949 	vfs_timestamp(&tsnow);
2950 	*retflags = 0;
2951 	if (usrtsp == NULL) {
2952 		tsp[0] = tsnow;
2953 		tsp[1] = tsnow;
2954 		*retflags |= UTIMENS_NULL;
2955 		return (0);
2956 	}
2957 	if (tspseg == UIO_SYSSPACE) {
2958 		tsp[0] = usrtsp[0];
2959 		tsp[1] = usrtsp[1];
2960 	} else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
2961 		return (error);
2962 	if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
2963 		*retflags |= UTIMENS_EXIT;
2964 	if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
2965 		*retflags |= UTIMENS_NULL;
2966 	if (tsp[0].tv_nsec == UTIME_OMIT)
2967 		tsp[0].tv_sec = VNOVAL;
2968 	else if (tsp[0].tv_nsec == UTIME_NOW)
2969 		tsp[0] = tsnow;
2970 	else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
2971 		return (EINVAL);
2972 	if (tsp[1].tv_nsec == UTIME_OMIT)
2973 		tsp[1].tv_sec = VNOVAL;
2974 	else if (tsp[1].tv_nsec == UTIME_NOW)
2975 		tsp[1] = tsnow;
2976 	else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
2977 		return (EINVAL);
2978 
2979 	return (0);
2980 }
2981 
2982 /*
2983  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
2984  * and utimensat().
2985  */
2986 static int
2987 setutimes(td, vp, ts, numtimes, nullflag)
2988 	struct thread *td;
2989 	struct vnode *vp;
2990 	const struct timespec *ts;
2991 	int numtimes;
2992 	int nullflag;
2993 {
2994 	struct mount *mp;
2995 	struct vattr vattr;
2996 	int error, setbirthtime;
2997 
2998 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2999 		return (error);
3000 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3001 	setbirthtime = 0;
3002 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
3003 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
3004 		setbirthtime = 1;
3005 	VATTR_NULL(&vattr);
3006 	vattr.va_atime = ts[0];
3007 	vattr.va_mtime = ts[1];
3008 	if (setbirthtime)
3009 		vattr.va_birthtime = ts[1];
3010 	if (numtimes > 2)
3011 		vattr.va_birthtime = ts[2];
3012 	if (nullflag)
3013 		vattr.va_vaflags |= VA_UTIMES_NULL;
3014 #ifdef MAC
3015 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
3016 	    vattr.va_mtime);
3017 #endif
3018 	if (error == 0)
3019 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3020 	VOP_UNLOCK(vp, 0);
3021 	vn_finished_write(mp);
3022 	return (error);
3023 }
3024 
3025 /*
3026  * Set the access and modification times of a file.
3027  */
3028 #ifndef _SYS_SYSPROTO_H_
3029 struct utimes_args {
3030 	char	*path;
3031 	struct	timeval *tptr;
3032 };
3033 #endif
3034 int
3035 sys_utimes(td, uap)
3036 	struct thread *td;
3037 	register struct utimes_args /* {
3038 		char *path;
3039 		struct timeval *tptr;
3040 	} */ *uap;
3041 {
3042 
3043 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
3044 	    uap->tptr, UIO_USERSPACE));
3045 }
3046 
3047 #ifndef _SYS_SYSPROTO_H_
3048 struct futimesat_args {
3049 	int fd;
3050 	const char * path;
3051 	const struct timeval * times;
3052 };
3053 #endif
3054 int
3055 sys_futimesat(struct thread *td, struct futimesat_args *uap)
3056 {
3057 
3058 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
3059 	    uap->times, UIO_USERSPACE));
3060 }
3061 
3062 int
3063 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3064     struct timeval *tptr, enum uio_seg tptrseg)
3065 {
3066 	struct nameidata nd;
3067 	struct timespec ts[2];
3068 	cap_rights_t rights;
3069 	int error;
3070 
3071 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3072 		return (error);
3073 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
3074 	    cap_rights_init(&rights, CAP_FUTIMES), td);
3075 
3076 	if ((error = namei(&nd)) != 0)
3077 		return (error);
3078 	NDFREE(&nd, NDF_ONLY_PNBUF);
3079 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3080 	vrele(nd.ni_vp);
3081 	return (error);
3082 }
3083 
3084 /*
3085  * Set the access and modification times of a file.
3086  */
3087 #ifndef _SYS_SYSPROTO_H_
3088 struct lutimes_args {
3089 	char	*path;
3090 	struct	timeval *tptr;
3091 };
3092 #endif
3093 int
3094 sys_lutimes(td, uap)
3095 	struct thread *td;
3096 	register struct lutimes_args /* {
3097 		char *path;
3098 		struct timeval *tptr;
3099 	} */ *uap;
3100 {
3101 
3102 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
3103 	    UIO_USERSPACE));
3104 }
3105 
3106 int
3107 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
3108     struct timeval *tptr, enum uio_seg tptrseg)
3109 {
3110 	struct timespec ts[2];
3111 	struct nameidata nd;
3112 	int error;
3113 
3114 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
3115 		return (error);
3116 	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
3117 	if ((error = namei(&nd)) != 0)
3118 		return (error);
3119 	NDFREE(&nd, NDF_ONLY_PNBUF);
3120 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
3121 	vrele(nd.ni_vp);
3122 	return (error);
3123 }
3124 
3125 /*
3126  * Set the access and modification times of a file.
3127  */
3128 #ifndef _SYS_SYSPROTO_H_
3129 struct futimes_args {
3130 	int	fd;
3131 	struct	timeval *tptr;
3132 };
3133 #endif
3134 int
3135 sys_futimes(td, uap)
3136 	struct thread *td;
3137 	register struct futimes_args /* {
3138 		int  fd;
3139 		struct timeval *tptr;
3140 	} */ *uap;
3141 {
3142 
3143 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
3144 }
3145 
3146 int
3147 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
3148     enum uio_seg tptrseg)
3149 {
3150 	struct timespec ts[2];
3151 	struct file *fp;
3152 	cap_rights_t rights;
3153 	int error;
3154 
3155 	AUDIT_ARG_FD(fd);
3156 	error = getutimes(tptr, tptrseg, ts);
3157 	if (error != 0)
3158 		return (error);
3159 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
3160 	if (error != 0)
3161 		return (error);
3162 #ifdef AUDIT
3163 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3164 	AUDIT_ARG_VNODE1(fp->f_vnode);
3165 	VOP_UNLOCK(fp->f_vnode, 0);
3166 #endif
3167 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3168 	fdrop(fp, td);
3169 	return (error);
3170 }
3171 
3172 int
3173 sys_futimens(struct thread *td, struct futimens_args *uap)
3174 {
3175 
3176 	return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
3177 }
3178 
3179 int
3180 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
3181     enum uio_seg tptrseg)
3182 {
3183 	struct timespec ts[2];
3184 	struct file *fp;
3185 	cap_rights_t rights;
3186 	int error, flags;
3187 
3188 	AUDIT_ARG_FD(fd);
3189 	error = getutimens(tptr, tptrseg, ts, &flags);
3190 	if (error != 0)
3191 		return (error);
3192 	if (flags & UTIMENS_EXIT)
3193 		return (0);
3194 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
3195 	if (error != 0)
3196 		return (error);
3197 #ifdef AUDIT
3198 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
3199 	AUDIT_ARG_VNODE1(fp->f_vnode);
3200 	VOP_UNLOCK(fp->f_vnode, 0);
3201 #endif
3202 	error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
3203 	fdrop(fp, td);
3204 	return (error);
3205 }
3206 
3207 int
3208 sys_utimensat(struct thread *td, struct utimensat_args *uap)
3209 {
3210 
3211 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
3212 	    uap->times, UIO_USERSPACE, uap->flag));
3213 }
3214 
3215 int
3216 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
3217     struct timespec *tptr, enum uio_seg tptrseg, int flag)
3218 {
3219 	struct nameidata nd;
3220 	struct timespec ts[2];
3221 	cap_rights_t rights;
3222 	int error, flags;
3223 
3224 	if (flag & ~AT_SYMLINK_NOFOLLOW)
3225 		return (EINVAL);
3226 
3227 	if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
3228 		return (error);
3229 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
3230 	    FOLLOW) | AUDITVNODE1, pathseg, path, fd,
3231 	    cap_rights_init(&rights, CAP_FUTIMES), td);
3232 	if ((error = namei(&nd)) != 0)
3233 		return (error);
3234 	/*
3235 	 * We are allowed to call namei() regardless of 2xUTIME_OMIT.
3236 	 * POSIX states:
3237 	 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
3238 	 * "Search permission is denied by a component of the path prefix."
3239 	 */
3240 	NDFREE(&nd, NDF_ONLY_PNBUF);
3241 	if ((flags & UTIMENS_EXIT) == 0)
3242 		error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
3243 	vrele(nd.ni_vp);
3244 	return (error);
3245 }
3246 
3247 /*
3248  * Truncate a file given its path name.
3249  */
3250 #ifndef _SYS_SYSPROTO_H_
3251 struct truncate_args {
3252 	char	*path;
3253 	int	pad;
3254 	off_t	length;
3255 };
3256 #endif
3257 int
3258 sys_truncate(td, uap)
3259 	struct thread *td;
3260 	register struct truncate_args /* {
3261 		char *path;
3262 		int pad;
3263 		off_t length;
3264 	} */ *uap;
3265 {
3266 
3267 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3268 }
3269 
3270 int
3271 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3272 {
3273 	struct mount *mp;
3274 	struct vnode *vp;
3275 	void *rl_cookie;
3276 	struct vattr vattr;
3277 	struct nameidata nd;
3278 	int error;
3279 
3280 	if (length < 0)
3281 		return(EINVAL);
3282 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
3283 	if ((error = namei(&nd)) != 0)
3284 		return (error);
3285 	vp = nd.ni_vp;
3286 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
3287 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3288 		vn_rangelock_unlock(vp, rl_cookie);
3289 		vrele(vp);
3290 		return (error);
3291 	}
3292 	NDFREE(&nd, NDF_ONLY_PNBUF);
3293 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3294 	if (vp->v_type == VDIR)
3295 		error = EISDIR;
3296 #ifdef MAC
3297 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
3298 	}
3299 #endif
3300 	else if ((error = vn_writechk(vp)) == 0 &&
3301 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3302 		VATTR_NULL(&vattr);
3303 		vattr.va_size = length;
3304 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
3305 	}
3306 	VOP_UNLOCK(vp, 0);
3307 	vn_finished_write(mp);
3308 	vn_rangelock_unlock(vp, rl_cookie);
3309 	vrele(vp);
3310 	return (error);
3311 }
3312 
3313 #if defined(COMPAT_43)
3314 /*
3315  * Truncate a file given its path name.
3316  */
3317 #ifndef _SYS_SYSPROTO_H_
3318 struct otruncate_args {
3319 	char	*path;
3320 	long	length;
3321 };
3322 #endif
3323 int
3324 otruncate(td, uap)
3325 	struct thread *td;
3326 	register struct otruncate_args /* {
3327 		char *path;
3328 		long length;
3329 	} */ *uap;
3330 {
3331 	struct truncate_args /* {
3332 		char *path;
3333 		int pad;
3334 		off_t length;
3335 	} */ nuap;
3336 
3337 	nuap.path = uap->path;
3338 	nuap.length = uap->length;
3339 	return (sys_truncate(td, &nuap));
3340 }
3341 #endif /* COMPAT_43 */
3342 
3343 #if defined(COMPAT_FREEBSD6)
3344 /* Versions with the pad argument */
3345 int
3346 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
3347 {
3348 	struct truncate_args ouap;
3349 
3350 	ouap.path = uap->path;
3351 	ouap.length = uap->length;
3352 	return (sys_truncate(td, &ouap));
3353 }
3354 
3355 int
3356 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
3357 {
3358 	struct ftruncate_args ouap;
3359 
3360 	ouap.fd = uap->fd;
3361 	ouap.length = uap->length;
3362 	return (sys_ftruncate(td, &ouap));
3363 }
3364 #endif
3365 
3366 /*
3367  * Sync an open file.
3368  */
3369 #ifndef _SYS_SYSPROTO_H_
3370 struct fsync_args {
3371 	int	fd;
3372 };
3373 #endif
3374 int
3375 sys_fsync(td, uap)
3376 	struct thread *td;
3377 	struct fsync_args /* {
3378 		int fd;
3379 	} */ *uap;
3380 {
3381 	struct vnode *vp;
3382 	struct mount *mp;
3383 	struct file *fp;
3384 	cap_rights_t rights;
3385 	int error, lock_flags;
3386 
3387 	AUDIT_ARG_FD(uap->fd);
3388 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp);
3389 	if (error != 0)
3390 		return (error);
3391 	vp = fp->f_vnode;
3392 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3393 	if (error != 0)
3394 		goto drop;
3395 	if (MNT_SHARED_WRITES(mp) ||
3396 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
3397 		lock_flags = LK_SHARED;
3398 	} else {
3399 		lock_flags = LK_EXCLUSIVE;
3400 	}
3401 	vn_lock(vp, lock_flags | LK_RETRY);
3402 	AUDIT_ARG_VNODE1(vp);
3403 	if (vp->v_object != NULL) {
3404 		VM_OBJECT_WLOCK(vp->v_object);
3405 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3406 		VM_OBJECT_WUNLOCK(vp->v_object);
3407 	}
3408 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3409 
3410 	VOP_UNLOCK(vp, 0);
3411 	vn_finished_write(mp);
3412 drop:
3413 	fdrop(fp, td);
3414 	return (error);
3415 }
3416 
3417 /*
3418  * Rename files.  Source and destination must either both be directories, or
3419  * both not be directories.  If target is a directory, it must be empty.
3420  */
3421 #ifndef _SYS_SYSPROTO_H_
3422 struct rename_args {
3423 	char	*from;
3424 	char	*to;
3425 };
3426 #endif
3427 int
3428 sys_rename(td, uap)
3429 	struct thread *td;
3430 	register struct rename_args /* {
3431 		char *from;
3432 		char *to;
3433 	} */ *uap;
3434 {
3435 
3436 	return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
3437 	    uap->to, UIO_USERSPACE));
3438 }
3439 
3440 #ifndef _SYS_SYSPROTO_H_
3441 struct renameat_args {
3442 	int	oldfd;
3443 	char	*old;
3444 	int	newfd;
3445 	char	*new;
3446 };
3447 #endif
3448 int
3449 sys_renameat(struct thread *td, struct renameat_args *uap)
3450 {
3451 
3452 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
3453 	    UIO_USERSPACE));
3454 }
3455 
3456 int
3457 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
3458     enum uio_seg pathseg)
3459 {
3460 	struct mount *mp = NULL;
3461 	struct vnode *tvp, *fvp, *tdvp;
3462 	struct nameidata fromnd, tond;
3463 	cap_rights_t rights;
3464 	int error;
3465 
3466 again:
3467 	bwillwrite();
3468 #ifdef MAC
3469 	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
3470 	    AUDITVNODE1, pathseg, old, oldfd,
3471 	    cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
3472 #else
3473 	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
3474 	    pathseg, old, oldfd,
3475 	    cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
3476 #endif
3477 
3478 	if ((error = namei(&fromnd)) != 0)
3479 		return (error);
3480 #ifdef MAC
3481 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
3482 	    fromnd.ni_vp, &fromnd.ni_cnd);
3483 	VOP_UNLOCK(fromnd.ni_dvp, 0);
3484 	if (fromnd.ni_dvp != fromnd.ni_vp)
3485 		VOP_UNLOCK(fromnd.ni_vp, 0);
3486 #endif
3487 	fvp = fromnd.ni_vp;
3488 	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
3489 	    SAVESTART | AUDITVNODE2, pathseg, new, newfd,
3490 	    cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td);
3491 	if (fromnd.ni_vp->v_type == VDIR)
3492 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3493 	if ((error = namei(&tond)) != 0) {
3494 		/* Translate error code for rename("dir1", "dir2/."). */
3495 		if (error == EISDIR && fvp->v_type == VDIR)
3496 			error = EINVAL;
3497 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3498 		vrele(fromnd.ni_dvp);
3499 		vrele(fvp);
3500 		goto out1;
3501 	}
3502 	tdvp = tond.ni_dvp;
3503 	tvp = tond.ni_vp;
3504 	error = vn_start_write(fvp, &mp, V_NOWAIT);
3505 	if (error != 0) {
3506 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3507 		NDFREE(&tond, NDF_ONLY_PNBUF);
3508 		if (tvp != NULL)
3509 			vput(tvp);
3510 		if (tdvp == tvp)
3511 			vrele(tdvp);
3512 		else
3513 			vput(tdvp);
3514 		vrele(fromnd.ni_dvp);
3515 		vrele(fvp);
3516 		vrele(tond.ni_startdir);
3517 		if (fromnd.ni_startdir != NULL)
3518 			vrele(fromnd.ni_startdir);
3519 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
3520 		if (error != 0)
3521 			return (error);
3522 		goto again;
3523 	}
3524 	if (tvp != NULL) {
3525 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3526 			error = ENOTDIR;
3527 			goto out;
3528 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3529 			error = EISDIR;
3530 			goto out;
3531 		}
3532 #ifdef CAPABILITIES
3533 		if (newfd != AT_FDCWD) {
3534 			/*
3535 			 * If the target already exists we require CAP_UNLINKAT
3536 			 * from 'newfd'.
3537 			 */
3538 			error = cap_check(&tond.ni_filecaps.fc_rights,
3539 			    cap_rights_init(&rights, CAP_UNLINKAT));
3540 			if (error != 0)
3541 				goto out;
3542 		}
3543 #endif
3544 	}
3545 	if (fvp == tdvp) {
3546 		error = EINVAL;
3547 		goto out;
3548 	}
3549 	/*
3550 	 * If the source is the same as the destination (that is, if they
3551 	 * are links to the same vnode), then there is nothing to do.
3552 	 */
3553 	if (fvp == tvp)
3554 		error = -1;
3555 #ifdef MAC
3556 	else
3557 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
3558 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3559 #endif
3560 out:
3561 	if (error == 0) {
3562 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3563 		    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3564 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3565 		NDFREE(&tond, NDF_ONLY_PNBUF);
3566 	} else {
3567 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3568 		NDFREE(&tond, NDF_ONLY_PNBUF);
3569 		if (tvp != NULL)
3570 			vput(tvp);
3571 		if (tdvp == tvp)
3572 			vrele(tdvp);
3573 		else
3574 			vput(tdvp);
3575 		vrele(fromnd.ni_dvp);
3576 		vrele(fvp);
3577 	}
3578 	vrele(tond.ni_startdir);
3579 	vn_finished_write(mp);
3580 out1:
3581 	if (fromnd.ni_startdir)
3582 		vrele(fromnd.ni_startdir);
3583 	if (error == -1)
3584 		return (0);
3585 	return (error);
3586 }
3587 
3588 /*
3589  * Make a directory file.
3590  */
3591 #ifndef _SYS_SYSPROTO_H_
3592 struct mkdir_args {
3593 	char	*path;
3594 	int	mode;
3595 };
3596 #endif
3597 int
3598 sys_mkdir(td, uap)
3599 	struct thread *td;
3600 	register struct mkdir_args /* {
3601 		char *path;
3602 		int mode;
3603 	} */ *uap;
3604 {
3605 
3606 	return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
3607 	    uap->mode));
3608 }
3609 
3610 #ifndef _SYS_SYSPROTO_H_
3611 struct mkdirat_args {
3612 	int	fd;
3613 	char	*path;
3614 	mode_t	mode;
3615 };
3616 #endif
3617 int
3618 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
3619 {
3620 
3621 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
3622 }
3623 
3624 int
3625 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
3626     int mode)
3627 {
3628 	struct mount *mp;
3629 	struct vnode *vp;
3630 	struct vattr vattr;
3631 	struct nameidata nd;
3632 	cap_rights_t rights;
3633 	int error;
3634 
3635 	AUDIT_ARG_MODE(mode);
3636 restart:
3637 	bwillwrite();
3638 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
3639 	    NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
3640 	    td);
3641 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3642 	if ((error = namei(&nd)) != 0)
3643 		return (error);
3644 	vp = nd.ni_vp;
3645 	if (vp != NULL) {
3646 		NDFREE(&nd, NDF_ONLY_PNBUF);
3647 		/*
3648 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3649 		 * the strange behaviour of leaving the vnode unlocked
3650 		 * if the target is the same vnode as the parent.
3651 		 */
3652 		if (vp == nd.ni_dvp)
3653 			vrele(nd.ni_dvp);
3654 		else
3655 			vput(nd.ni_dvp);
3656 		vrele(vp);
3657 		return (EEXIST);
3658 	}
3659 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3660 		NDFREE(&nd, NDF_ONLY_PNBUF);
3661 		vput(nd.ni_dvp);
3662 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3663 			return (error);
3664 		goto restart;
3665 	}
3666 	VATTR_NULL(&vattr);
3667 	vattr.va_type = VDIR;
3668 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3669 #ifdef MAC
3670 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3671 	    &vattr);
3672 	if (error != 0)
3673 		goto out;
3674 #endif
3675 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3676 #ifdef MAC
3677 out:
3678 #endif
3679 	NDFREE(&nd, NDF_ONLY_PNBUF);
3680 	vput(nd.ni_dvp);
3681 	if (error == 0)
3682 		vput(nd.ni_vp);
3683 	vn_finished_write(mp);
3684 	return (error);
3685 }
3686 
3687 /*
3688  * Remove a directory file.
3689  */
3690 #ifndef _SYS_SYSPROTO_H_
3691 struct rmdir_args {
3692 	char	*path;
3693 };
3694 #endif
3695 int
3696 sys_rmdir(td, uap)
3697 	struct thread *td;
3698 	struct rmdir_args /* {
3699 		char *path;
3700 	} */ *uap;
3701 {
3702 
3703 	return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE));
3704 }
3705 
3706 int
3707 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
3708 {
3709 	struct mount *mp;
3710 	struct vnode *vp;
3711 	struct nameidata nd;
3712 	cap_rights_t rights;
3713 	int error;
3714 
3715 restart:
3716 	bwillwrite();
3717 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
3718 	    pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
3719 	if ((error = namei(&nd)) != 0)
3720 		return (error);
3721 	vp = nd.ni_vp;
3722 	if (vp->v_type != VDIR) {
3723 		error = ENOTDIR;
3724 		goto out;
3725 	}
3726 	/*
3727 	 * No rmdir "." please.
3728 	 */
3729 	if (nd.ni_dvp == vp) {
3730 		error = EINVAL;
3731 		goto out;
3732 	}
3733 	/*
3734 	 * The root of a mounted filesystem cannot be deleted.
3735 	 */
3736 	if (vp->v_vflag & VV_ROOT) {
3737 		error = EBUSY;
3738 		goto out;
3739 	}
3740 #ifdef MAC
3741 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
3742 	    &nd.ni_cnd);
3743 	if (error != 0)
3744 		goto out;
3745 #endif
3746 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3747 		NDFREE(&nd, NDF_ONLY_PNBUF);
3748 		vput(vp);
3749 		if (nd.ni_dvp == vp)
3750 			vrele(nd.ni_dvp);
3751 		else
3752 			vput(nd.ni_dvp);
3753 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3754 			return (error);
3755 		goto restart;
3756 	}
3757 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
3758 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3759 	vn_finished_write(mp);
3760 out:
3761 	NDFREE(&nd, NDF_ONLY_PNBUF);
3762 	vput(vp);
3763 	if (nd.ni_dvp == vp)
3764 		vrele(nd.ni_dvp);
3765 	else
3766 		vput(nd.ni_dvp);
3767 	return (error);
3768 }
3769 
3770 #ifdef COMPAT_43
3771 /*
3772  * Read a block of directory entries in a filesystem independent format.
3773  */
3774 #ifndef _SYS_SYSPROTO_H_
3775 struct ogetdirentries_args {
3776 	int	fd;
3777 	char	*buf;
3778 	u_int	count;
3779 	long	*basep;
3780 };
3781 #endif
3782 int
3783 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
3784 {
3785 	long loff;
3786 	int error;
3787 
3788 	error = kern_ogetdirentries(td, uap, &loff);
3789 	if (error == 0)
3790 		error = copyout(&loff, uap->basep, sizeof(long));
3791 	return (error);
3792 }
3793 
3794 int
3795 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
3796     long *ploff)
3797 {
3798 	struct vnode *vp;
3799 	struct file *fp;
3800 	struct uio auio, kuio;
3801 	struct iovec aiov, kiov;
3802 	struct dirent *dp, *edp;
3803 	cap_rights_t rights;
3804 	caddr_t dirbuf;
3805 	int error, eofflag, readcnt;
3806 	long loff;
3807 	off_t foffset;
3808 
3809 	/* XXX arbitrary sanity limit on `count'. */
3810 	if (uap->count > 64 * 1024)
3811 		return (EINVAL);
3812 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp);
3813 	if (error != 0)
3814 		return (error);
3815 	if ((fp->f_flag & FREAD) == 0) {
3816 		fdrop(fp, td);
3817 		return (EBADF);
3818 	}
3819 	vp = fp->f_vnode;
3820 	foffset = foffset_lock(fp, 0);
3821 unionread:
3822 	if (vp->v_type != VDIR) {
3823 		foffset_unlock(fp, foffset, 0);
3824 		fdrop(fp, td);
3825 		return (EINVAL);
3826 	}
3827 	aiov.iov_base = uap->buf;
3828 	aiov.iov_len = uap->count;
3829 	auio.uio_iov = &aiov;
3830 	auio.uio_iovcnt = 1;
3831 	auio.uio_rw = UIO_READ;
3832 	auio.uio_segflg = UIO_USERSPACE;
3833 	auio.uio_td = td;
3834 	auio.uio_resid = uap->count;
3835 	vn_lock(vp, LK_SHARED | LK_RETRY);
3836 	loff = auio.uio_offset = foffset;
3837 #ifdef MAC
3838 	error = mac_vnode_check_readdir(td->td_ucred, vp);
3839 	if (error != 0) {
3840 		VOP_UNLOCK(vp, 0);
3841 		foffset_unlock(fp, foffset, FOF_NOUPDATE);
3842 		fdrop(fp, td);
3843 		return (error);
3844 	}
3845 #endif
3846 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3847 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3848 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3849 			    NULL, NULL);
3850 			foffset = auio.uio_offset;
3851 		} else
3852 #	endif
3853 	{
3854 		kuio = auio;
3855 		kuio.uio_iov = &kiov;
3856 		kuio.uio_segflg = UIO_SYSSPACE;
3857 		kiov.iov_len = uap->count;
3858 		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
3859 		kiov.iov_base = dirbuf;
3860 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3861 			    NULL, NULL);
3862 		foffset = kuio.uio_offset;
3863 		if (error == 0) {
3864 			readcnt = uap->count - kuio.uio_resid;
3865 			edp = (struct dirent *)&dirbuf[readcnt];
3866 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3867 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3868 					/*
3869 					 * The expected low byte of
3870 					 * dp->d_namlen is our dp->d_type.
3871 					 * The high MBZ byte of dp->d_namlen
3872 					 * is our dp->d_namlen.
3873 					 */
3874 					dp->d_type = dp->d_namlen;
3875 					dp->d_namlen = 0;
3876 #				else
3877 					/*
3878 					 * The dp->d_type is the high byte
3879 					 * of the expected dp->d_namlen,
3880 					 * so must be zero'ed.
3881 					 */
3882 					dp->d_type = 0;
3883 #				endif
3884 				if (dp->d_reclen > 0) {
3885 					dp = (struct dirent *)
3886 					    ((char *)dp + dp->d_reclen);
3887 				} else {
3888 					error = EIO;
3889 					break;
3890 				}
3891 			}
3892 			if (dp >= edp)
3893 				error = uiomove(dirbuf, readcnt, &auio);
3894 		}
3895 		free(dirbuf, M_TEMP);
3896 	}
3897 	if (error != 0) {
3898 		VOP_UNLOCK(vp, 0);
3899 		foffset_unlock(fp, foffset, 0);
3900 		fdrop(fp, td);
3901 		return (error);
3902 	}
3903 	if (uap->count == auio.uio_resid &&
3904 	    (vp->v_vflag & VV_ROOT) &&
3905 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
3906 		struct vnode *tvp = vp;
3907 		vp = vp->v_mount->mnt_vnodecovered;
3908 		VREF(vp);
3909 		fp->f_vnode = vp;
3910 		fp->f_data = vp;
3911 		foffset = 0;
3912 		vput(tvp);
3913 		goto unionread;
3914 	}
3915 	VOP_UNLOCK(vp, 0);
3916 	foffset_unlock(fp, foffset, 0);
3917 	fdrop(fp, td);
3918 	td->td_retval[0] = uap->count - auio.uio_resid;
3919 	if (error == 0)
3920 		*ploff = loff;
3921 	return (error);
3922 }
3923 #endif /* COMPAT_43 */
3924 
3925 /*
3926  * Read a block of directory entries in a filesystem independent format.
3927  */
3928 #ifndef _SYS_SYSPROTO_H_
3929 struct getdirentries_args {
3930 	int	fd;
3931 	char	*buf;
3932 	u_int	count;
3933 	long	*basep;
3934 };
3935 #endif
3936 int
3937 sys_getdirentries(td, uap)
3938 	struct thread *td;
3939 	register struct getdirentries_args /* {
3940 		int fd;
3941 		char *buf;
3942 		u_int count;
3943 		long *basep;
3944 	} */ *uap;
3945 {
3946 	long base;
3947 	int error;
3948 
3949 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
3950 	    NULL, UIO_USERSPACE);
3951 	if (error != 0)
3952 		return (error);
3953 	if (uap->basep != NULL)
3954 		error = copyout(&base, uap->basep, sizeof(long));
3955 	return (error);
3956 }
3957 
3958 int
3959 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
3960     long *basep, ssize_t *residp, enum uio_seg bufseg)
3961 {
3962 	struct vnode *vp;
3963 	struct file *fp;
3964 	struct uio auio;
3965 	struct iovec aiov;
3966 	cap_rights_t rights;
3967 	long loff;
3968 	int error, eofflag;
3969 	off_t foffset;
3970 
3971 	AUDIT_ARG_FD(fd);
3972 	if (count > IOSIZE_MAX)
3973 		return (EINVAL);
3974 	auio.uio_resid = count;
3975 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
3976 	if (error != 0)
3977 		return (error);
3978 	if ((fp->f_flag & FREAD) == 0) {
3979 		fdrop(fp, td);
3980 		return (EBADF);
3981 	}
3982 	vp = fp->f_vnode;
3983 	foffset = foffset_lock(fp, 0);
3984 unionread:
3985 	if (vp->v_type != VDIR) {
3986 		error = EINVAL;
3987 		goto fail;
3988 	}
3989 	aiov.iov_base = buf;
3990 	aiov.iov_len = count;
3991 	auio.uio_iov = &aiov;
3992 	auio.uio_iovcnt = 1;
3993 	auio.uio_rw = UIO_READ;
3994 	auio.uio_segflg = bufseg;
3995 	auio.uio_td = td;
3996 	vn_lock(vp, LK_SHARED | LK_RETRY);
3997 	AUDIT_ARG_VNODE1(vp);
3998 	loff = auio.uio_offset = foffset;
3999 #ifdef MAC
4000 	error = mac_vnode_check_readdir(td->td_ucred, vp);
4001 	if (error == 0)
4002 #endif
4003 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
4004 		    NULL);
4005 	foffset = auio.uio_offset;
4006 	if (error != 0) {
4007 		VOP_UNLOCK(vp, 0);
4008 		goto fail;
4009 	}
4010 	if (count == auio.uio_resid &&
4011 	    (vp->v_vflag & VV_ROOT) &&
4012 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
4013 		struct vnode *tvp = vp;
4014 
4015 		vp = vp->v_mount->mnt_vnodecovered;
4016 		VREF(vp);
4017 		fp->f_vnode = vp;
4018 		fp->f_data = vp;
4019 		foffset = 0;
4020 		vput(tvp);
4021 		goto unionread;
4022 	}
4023 	VOP_UNLOCK(vp, 0);
4024 	*basep = loff;
4025 	if (residp != NULL)
4026 		*residp = auio.uio_resid;
4027 	td->td_retval[0] = count - auio.uio_resid;
4028 fail:
4029 	foffset_unlock(fp, foffset, 0);
4030 	fdrop(fp, td);
4031 	return (error);
4032 }
4033 
4034 #ifndef _SYS_SYSPROTO_H_
4035 struct getdents_args {
4036 	int fd;
4037 	char *buf;
4038 	size_t count;
4039 };
4040 #endif
4041 int
4042 sys_getdents(td, uap)
4043 	struct thread *td;
4044 	register struct getdents_args /* {
4045 		int fd;
4046 		char *buf;
4047 		u_int count;
4048 	} */ *uap;
4049 {
4050 	struct getdirentries_args ap;
4051 
4052 	ap.fd = uap->fd;
4053 	ap.buf = uap->buf;
4054 	ap.count = uap->count;
4055 	ap.basep = NULL;
4056 	return (sys_getdirentries(td, &ap));
4057 }
4058 
4059 /*
4060  * Set the mode mask for creation of filesystem nodes.
4061  */
4062 #ifndef _SYS_SYSPROTO_H_
4063 struct umask_args {
4064 	int	newmask;
4065 };
4066 #endif
4067 int
4068 sys_umask(td, uap)
4069 	struct thread *td;
4070 	struct umask_args /* {
4071 		int newmask;
4072 	} */ *uap;
4073 {
4074 	struct filedesc *fdp;
4075 
4076 	fdp = td->td_proc->p_fd;
4077 	FILEDESC_XLOCK(fdp);
4078 	td->td_retval[0] = fdp->fd_cmask;
4079 	fdp->fd_cmask = uap->newmask & ALLPERMS;
4080 	FILEDESC_XUNLOCK(fdp);
4081 	return (0);
4082 }
4083 
4084 /*
4085  * Void all references to file by ripping underlying filesystem away from
4086  * vnode.
4087  */
4088 #ifndef _SYS_SYSPROTO_H_
4089 struct revoke_args {
4090 	char	*path;
4091 };
4092 #endif
4093 int
4094 sys_revoke(td, uap)
4095 	struct thread *td;
4096 	register struct revoke_args /* {
4097 		char *path;
4098 	} */ *uap;
4099 {
4100 	struct vnode *vp;
4101 	struct vattr vattr;
4102 	struct nameidata nd;
4103 	int error;
4104 
4105 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
4106 	    uap->path, td);
4107 	if ((error = namei(&nd)) != 0)
4108 		return (error);
4109 	vp = nd.ni_vp;
4110 	NDFREE(&nd, NDF_ONLY_PNBUF);
4111 	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
4112 		error = EINVAL;
4113 		goto out;
4114 	}
4115 #ifdef MAC
4116 	error = mac_vnode_check_revoke(td->td_ucred, vp);
4117 	if (error != 0)
4118 		goto out;
4119 #endif
4120 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
4121 	if (error != 0)
4122 		goto out;
4123 	if (td->td_ucred->cr_uid != vattr.va_uid) {
4124 		error = priv_check(td, PRIV_VFS_ADMIN);
4125 		if (error != 0)
4126 			goto out;
4127 	}
4128 	if (vcount(vp) > 1)
4129 		VOP_REVOKE(vp, REVOKEALL);
4130 out:
4131 	vput(vp);
4132 	return (error);
4133 }
4134 
4135 /*
4136  * Convert a user file descriptor to a kernel file entry and check that, if it
4137  * is a capability, the correct rights are present. A reference on the file
4138  * entry is held upon returning.
4139  */
4140 int
4141 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
4142 {
4143 	struct file *fp;
4144 	int error;
4145 
4146 	error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
4147 	if (error != 0)
4148 		return (error);
4149 
4150 	/*
4151 	 * The file could be not of the vnode type, or it may be not
4152 	 * yet fully initialized, in which case the f_vnode pointer
4153 	 * may be set, but f_ops is still badfileops.  E.g.,
4154 	 * devfs_open() transiently create such situation to
4155 	 * facilitate csw d_fdopen().
4156 	 *
4157 	 * Dupfdopen() handling in kern_openat() installs the
4158 	 * half-baked file into the process descriptor table, allowing
4159 	 * other thread to dereference it. Guard against the race by
4160 	 * checking f_ops.
4161 	 */
4162 	if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
4163 		fdrop(fp, td);
4164 		return (EINVAL);
4165 	}
4166 	*fpp = fp;
4167 	return (0);
4168 }
4169 
4170 
4171 /*
4172  * Get an (NFS) file handle.
4173  */
4174 #ifndef _SYS_SYSPROTO_H_
4175 struct lgetfh_args {
4176 	char	*fname;
4177 	fhandle_t *fhp;
4178 };
4179 #endif
4180 int
4181 sys_lgetfh(td, uap)
4182 	struct thread *td;
4183 	register struct lgetfh_args *uap;
4184 {
4185 	struct nameidata nd;
4186 	fhandle_t fh;
4187 	register struct vnode *vp;
4188 	int error;
4189 
4190 	error = priv_check(td, PRIV_VFS_GETFH);
4191 	if (error != 0)
4192 		return (error);
4193 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
4194 	    uap->fname, td);
4195 	error = namei(&nd);
4196 	if (error != 0)
4197 		return (error);
4198 	NDFREE(&nd, NDF_ONLY_PNBUF);
4199 	vp = nd.ni_vp;
4200 	bzero(&fh, sizeof(fh));
4201 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4202 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4203 	vput(vp);
4204 	if (error == 0)
4205 		error = copyout(&fh, uap->fhp, sizeof (fh));
4206 	return (error);
4207 }
4208 
4209 #ifndef _SYS_SYSPROTO_H_
4210 struct getfh_args {
4211 	char	*fname;
4212 	fhandle_t *fhp;
4213 };
4214 #endif
4215 int
4216 sys_getfh(td, uap)
4217 	struct thread *td;
4218 	register struct getfh_args *uap;
4219 {
4220 	struct nameidata nd;
4221 	fhandle_t fh;
4222 	register struct vnode *vp;
4223 	int error;
4224 
4225 	error = priv_check(td, PRIV_VFS_GETFH);
4226 	if (error != 0)
4227 		return (error);
4228 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
4229 	    uap->fname, td);
4230 	error = namei(&nd);
4231 	if (error != 0)
4232 		return (error);
4233 	NDFREE(&nd, NDF_ONLY_PNBUF);
4234 	vp = nd.ni_vp;
4235 	bzero(&fh, sizeof(fh));
4236 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4237 	error = VOP_VPTOFH(vp, &fh.fh_fid);
4238 	vput(vp);
4239 	if (error == 0)
4240 		error = copyout(&fh, uap->fhp, sizeof (fh));
4241 	return (error);
4242 }
4243 
4244 /*
4245  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4246  * open descriptor.
4247  *
4248  * warning: do not remove the priv_check() call or this becomes one giant
4249  * security hole.
4250  */
4251 #ifndef _SYS_SYSPROTO_H_
4252 struct fhopen_args {
4253 	const struct fhandle *u_fhp;
4254 	int flags;
4255 };
4256 #endif
4257 int
4258 sys_fhopen(td, uap)
4259 	struct thread *td;
4260 	struct fhopen_args /* {
4261 		const struct fhandle *u_fhp;
4262 		int flags;
4263 	} */ *uap;
4264 {
4265 	struct mount *mp;
4266 	struct vnode *vp;
4267 	struct fhandle fhp;
4268 	struct file *fp;
4269 	int fmode, error;
4270 	int indx;
4271 
4272 	error = priv_check(td, PRIV_VFS_FHOPEN);
4273 	if (error != 0)
4274 		return (error);
4275 	indx = -1;
4276 	fmode = FFLAGS(uap->flags);
4277 	/* why not allow a non-read/write open for our lockd? */
4278 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4279 		return (EINVAL);
4280 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4281 	if (error != 0)
4282 		return(error);
4283 	/* find the mount point */
4284 	mp = vfs_busyfs(&fhp.fh_fsid);
4285 	if (mp == NULL)
4286 		return (ESTALE);
4287 	/* now give me my vnode, it gets returned to me locked */
4288 	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
4289 	vfs_unbusy(mp);
4290 	if (error != 0)
4291 		return (error);
4292 
4293 	error = falloc_noinstall(td, &fp);
4294 	if (error != 0) {
4295 		vput(vp);
4296 		return (error);
4297 	}
4298 	/*
4299 	 * An extra reference on `fp' has been held for us by
4300 	 * falloc_noinstall().
4301 	 */
4302 
4303 #ifdef INVARIANTS
4304 	td->td_dupfd = -1;
4305 #endif
4306 	error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
4307 	if (error != 0) {
4308 		KASSERT(fp->f_ops == &badfileops,
4309 		    ("VOP_OPEN in fhopen() set f_ops"));
4310 		KASSERT(td->td_dupfd < 0,
4311 		    ("fhopen() encountered fdopen()"));
4312 
4313 		vput(vp);
4314 		goto bad;
4315 	}
4316 #ifdef INVARIANTS
4317 	td->td_dupfd = 0;
4318 #endif
4319 	fp->f_vnode = vp;
4320 	fp->f_seqcount = 1;
4321 	finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
4322 	    &vnops);
4323 	VOP_UNLOCK(vp, 0);
4324 	if ((fmode & O_TRUNC) != 0) {
4325 		error = fo_truncate(fp, 0, td->td_ucred, td);
4326 		if (error != 0)
4327 			goto bad;
4328 	}
4329 
4330 	error = finstall(td, fp, &indx, fmode, NULL);
4331 bad:
4332 	fdrop(fp, td);
4333 	td->td_retval[0] = indx;
4334 	return (error);
4335 }
4336 
4337 /*
4338  * Stat an (NFS) file handle.
4339  */
4340 #ifndef _SYS_SYSPROTO_H_
4341 struct fhstat_args {
4342 	struct fhandle *u_fhp;
4343 	struct stat *sb;
4344 };
4345 #endif
4346 int
4347 sys_fhstat(td, uap)
4348 	struct thread *td;
4349 	register struct fhstat_args /* {
4350 		struct fhandle *u_fhp;
4351 		struct stat *sb;
4352 	} */ *uap;
4353 {
4354 	struct stat sb;
4355 	struct fhandle fh;
4356 	int error;
4357 
4358 	error = copyin(uap->u_fhp, &fh, sizeof(fh));
4359 	if (error != 0)
4360 		return (error);
4361 	error = kern_fhstat(td, fh, &sb);
4362 	if (error == 0)
4363 		error = copyout(&sb, uap->sb, sizeof(sb));
4364 	return (error);
4365 }
4366 
4367 int
4368 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
4369 {
4370 	struct mount *mp;
4371 	struct vnode *vp;
4372 	int error;
4373 
4374 	error = priv_check(td, PRIV_VFS_FHSTAT);
4375 	if (error != 0)
4376 		return (error);
4377 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4378 		return (ESTALE);
4379 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4380 	vfs_unbusy(mp);
4381 	if (error != 0)
4382 		return (error);
4383 	error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
4384 	vput(vp);
4385 	return (error);
4386 }
4387 
4388 /*
4389  * Implement fstatfs() for (NFS) file handles.
4390  */
4391 #ifndef _SYS_SYSPROTO_H_
4392 struct fhstatfs_args {
4393 	struct fhandle *u_fhp;
4394 	struct statfs *buf;
4395 };
4396 #endif
4397 int
4398 sys_fhstatfs(td, uap)
4399 	struct thread *td;
4400 	struct fhstatfs_args /* {
4401 		struct fhandle *u_fhp;
4402 		struct statfs *buf;
4403 	} */ *uap;
4404 {
4405 	struct statfs sf;
4406 	fhandle_t fh;
4407 	int error;
4408 
4409 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4410 	if (error != 0)
4411 		return (error);
4412 	error = kern_fhstatfs(td, fh, &sf);
4413 	if (error != 0)
4414 		return (error);
4415 	return (copyout(&sf, uap->buf, sizeof(sf)));
4416 }
4417 
4418 int
4419 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4420 {
4421 	struct statfs *sp;
4422 	struct mount *mp;
4423 	struct vnode *vp;
4424 	int error;
4425 
4426 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4427 	if (error != 0)
4428 		return (error);
4429 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
4430 		return (ESTALE);
4431 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
4432 	if (error != 0) {
4433 		vfs_unbusy(mp);
4434 		return (error);
4435 	}
4436 	vput(vp);
4437 	error = prison_canseemount(td->td_ucred, mp);
4438 	if (error != 0)
4439 		goto out;
4440 #ifdef MAC
4441 	error = mac_mount_check_stat(td->td_ucred, mp);
4442 	if (error != 0)
4443 		goto out;
4444 #endif
4445 	/*
4446 	 * Set these in case the underlying filesystem fails to do so.
4447 	 */
4448 	sp = &mp->mnt_stat;
4449 	sp->f_version = STATFS_VERSION;
4450 	sp->f_namemax = NAME_MAX;
4451 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4452 	error = VFS_STATFS(mp, sp);
4453 	if (error == 0)
4454 		*buf = *sp;
4455 out:
4456 	vfs_unbusy(mp);
4457 	return (error);
4458 }
4459 
4460 int
4461 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
4462 {
4463 	struct file *fp;
4464 	struct mount *mp;
4465 	struct vnode *vp;
4466 	cap_rights_t rights;
4467 	off_t olen, ooffset;
4468 	int error;
4469 
4470 	if (offset < 0 || len <= 0)
4471 		return (EINVAL);
4472 	/* Check for wrap. */
4473 	if (offset > OFF_MAX - len)
4474 		return (EFBIG);
4475 	error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
4476 	if (error != 0)
4477 		return (error);
4478 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
4479 		error = ESPIPE;
4480 		goto out;
4481 	}
4482 	if ((fp->f_flag & FWRITE) == 0) {
4483 		error = EBADF;
4484 		goto out;
4485 	}
4486 	if (fp->f_type != DTYPE_VNODE) {
4487 		error = ENODEV;
4488 		goto out;
4489 	}
4490 	vp = fp->f_vnode;
4491 	if (vp->v_type != VREG) {
4492 		error = ENODEV;
4493 		goto out;
4494 	}
4495 
4496 	/* Allocating blocks may take a long time, so iterate. */
4497 	for (;;) {
4498 		olen = len;
4499 		ooffset = offset;
4500 
4501 		bwillwrite();
4502 		mp = NULL;
4503 		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4504 		if (error != 0)
4505 			break;
4506 		error = vn_lock(vp, LK_EXCLUSIVE);
4507 		if (error != 0) {
4508 			vn_finished_write(mp);
4509 			break;
4510 		}
4511 #ifdef MAC
4512 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
4513 		if (error == 0)
4514 #endif
4515 			error = VOP_ALLOCATE(vp, &offset, &len);
4516 		VOP_UNLOCK(vp, 0);
4517 		vn_finished_write(mp);
4518 
4519 		if (olen + ooffset != offset + len) {
4520 			panic("offset + len changed from %jx/%jx to %jx/%jx",
4521 			    ooffset, olen, offset, len);
4522 		}
4523 		if (error != 0 || len == 0)
4524 			break;
4525 		KASSERT(olen > len, ("Iteration did not make progress?"));
4526 		maybe_yield();
4527 	}
4528  out:
4529 	fdrop(fp, td);
4530 	return (error);
4531 }
4532 
4533 int
4534 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
4535 {
4536 
4537 	td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset,
4538 	    uap->len);
4539 	return (0);
4540 }
4541 
4542 /*
4543  * Unlike madvise(2), we do not make a best effort to remember every
4544  * possible caching hint.  Instead, we remember the last setting with
4545  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
4546  * region of any current setting.
4547  */
4548 int
4549 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
4550     int advice)
4551 {
4552 	struct fadvise_info *fa, *new;
4553 	struct file *fp;
4554 	struct vnode *vp;
4555 	cap_rights_t rights;
4556 	off_t end;
4557 	int error;
4558 
4559 	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
4560 		return (EINVAL);
4561 	switch (advice) {
4562 	case POSIX_FADV_SEQUENTIAL:
4563 	case POSIX_FADV_RANDOM:
4564 	case POSIX_FADV_NOREUSE:
4565 		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
4566 		break;
4567 	case POSIX_FADV_NORMAL:
4568 	case POSIX_FADV_WILLNEED:
4569 	case POSIX_FADV_DONTNEED:
4570 		new = NULL;
4571 		break;
4572 	default:
4573 		return (EINVAL);
4574 	}
4575 	/* XXX: CAP_POSIX_FADVISE? */
4576 	error = fget(td, fd, cap_rights_init(&rights), &fp);
4577 	if (error != 0)
4578 		goto out;
4579 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
4580 		error = ESPIPE;
4581 		goto out;
4582 	}
4583 	if (fp->f_type != DTYPE_VNODE) {
4584 		error = ENODEV;
4585 		goto out;
4586 	}
4587 	vp = fp->f_vnode;
4588 	if (vp->v_type != VREG) {
4589 		error = ENODEV;
4590 		goto out;
4591 	}
4592 	if (len == 0)
4593 		end = OFF_MAX;
4594 	else
4595 		end = offset + len - 1;
4596 	switch (advice) {
4597 	case POSIX_FADV_SEQUENTIAL:
4598 	case POSIX_FADV_RANDOM:
4599 	case POSIX_FADV_NOREUSE:
4600 		/*
4601 		 * Try to merge any existing non-standard region with
4602 		 * this new region if possible, otherwise create a new
4603 		 * non-standard region for this request.
4604 		 */
4605 		mtx_pool_lock(mtxpool_sleep, fp);
4606 		fa = fp->f_advice;
4607 		if (fa != NULL && fa->fa_advice == advice &&
4608 		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
4609 		    (end != OFF_MAX && fa->fa_start == end + 1) ||
4610 		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
4611 			if (offset < fa->fa_start)
4612 				fa->fa_start = offset;
4613 			if (end > fa->fa_end)
4614 				fa->fa_end = end;
4615 		} else {
4616 			new->fa_advice = advice;
4617 			new->fa_start = offset;
4618 			new->fa_end = end;
4619 			fp->f_advice = new;
4620 			new = fa;
4621 		}
4622 		mtx_pool_unlock(mtxpool_sleep, fp);
4623 		break;
4624 	case POSIX_FADV_NORMAL:
4625 		/*
4626 		 * If a the "normal" region overlaps with an existing
4627 		 * non-standard region, trim or remove the
4628 		 * non-standard region.
4629 		 */
4630 		mtx_pool_lock(mtxpool_sleep, fp);
4631 		fa = fp->f_advice;
4632 		if (fa != NULL) {
4633 			if (offset <= fa->fa_start && end >= fa->fa_end) {
4634 				new = fa;
4635 				fp->f_advice = NULL;
4636 			} else if (offset <= fa->fa_start &&
4637 			    end >= fa->fa_start)
4638 				fa->fa_start = end + 1;
4639 			else if (offset <= fa->fa_end && end >= fa->fa_end)
4640 				fa->fa_end = offset - 1;
4641 			else if (offset >= fa->fa_start && end <= fa->fa_end) {
4642 				/*
4643 				 * If the "normal" region is a middle
4644 				 * portion of the existing
4645 				 * non-standard region, just remove
4646 				 * the whole thing rather than picking
4647 				 * one side or the other to
4648 				 * preserve.
4649 				 */
4650 				new = fa;
4651 				fp->f_advice = NULL;
4652 			}
4653 		}
4654 		mtx_pool_unlock(mtxpool_sleep, fp);
4655 		break;
4656 	case POSIX_FADV_WILLNEED:
4657 	case POSIX_FADV_DONTNEED:
4658 		error = VOP_ADVISE(vp, offset, end, advice);
4659 		break;
4660 	}
4661 out:
4662 	if (fp != NULL)
4663 		fdrop(fp, td);
4664 	free(new, M_FADVISE);
4665 	return (error);
4666 }
4667 
4668 int
4669 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
4670 {
4671 
4672 	td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset,
4673 	    uap->len, uap->advice);
4674 	return (0);
4675 }
4676