xref: /freebsd/sys/kern/vfs_syscalls.c (revision 7afc53b8dfcc7d5897920ce6cc7e842fbb4ab813)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/uma.h>
77 
78 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81 static int setfmode(struct thread *td, struct vnode *, int);
82 static int setfflags(struct thread *td, struct vnode *, int);
83 static int setutimes(struct thread *td, struct vnode *,
84     const struct timespec *, int, int);
85 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86     struct thread *td);
87 
88 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89     size_t nbytes, struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 
93 /*
94  * The module initialization routine for POSIX asynchronous I/O will
95  * set this to the version of AIO that it implements.  (Zero means
96  * that it is not implemented.)  This value is used here by pathconf()
97  * and in kern_descrip.c by fpathconf().
98  */
99 int async_io_version;
100 
101 /*
102  * Sync each mounted filesystem.
103  */
104 #ifndef _SYS_SYSPROTO_H_
105 struct sync_args {
106 	int     dummy;
107 };
108 #endif
109 
110 #ifdef DEBUG
111 static int syncprt = 0;
112 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
113 #endif
114 
115 /* ARGSUSED */
116 int
117 sync(td, uap)
118 	struct thread *td;
119 	struct sync_args *uap;
120 {
121 	struct mount *mp, *nmp;
122 	int asyncflag;
123 
124 	mtx_lock(&Giant);
125 	mtx_lock(&mountlist_mtx);
126 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128 			nmp = TAILQ_NEXT(mp, mnt_list);
129 			continue;
130 		}
131 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133 			asyncflag = mp->mnt_flag & MNT_ASYNC;
134 			mp->mnt_flag &= ~MNT_ASYNC;
135 			vfs_msync(mp, MNT_NOWAIT);
136 			VFS_SYNC(mp, MNT_NOWAIT, td);
137 			mp->mnt_flag |= asyncflag;
138 			vn_finished_write(mp);
139 		}
140 		mtx_lock(&mountlist_mtx);
141 		nmp = TAILQ_NEXT(mp, mnt_list);
142 		vfs_unbusy(mp, td);
143 	}
144 	mtx_unlock(&mountlist_mtx);
145 #if 0
146 /*
147  * XXX don't call vfs_bufstats() yet because that routine
148  * was not imported in the Lite2 merge.
149  */
150 #ifdef DIAGNOSTIC
151 	if (syncprt)
152 		vfs_bufstats();
153 #endif /* DIAGNOSTIC */
154 #endif
155 	mtx_unlock(&Giant);
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  *
168  * MP SAFE
169  */
170 #ifndef _SYS_SYSPROTO_H_
171 struct quotactl_args {
172 	char *path;
173 	int cmd;
174 	int uid;
175 	caddr_t arg;
176 };
177 #endif
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		char *path;
183 		int cmd;
184 		int uid;
185 		caddr_t arg;
186 	} */ *uap;
187 {
188 	struct mount *mp, *vmp;
189 	int error;
190 	struct nameidata nd;
191 
192 	if (jailed(td->td_ucred) && !prison_quotas)
193 		return (EPERM);
194 	mtx_lock(&Giant);
195 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
196 	if ((error = namei(&nd)) != 0) {
197 		mtx_unlock(&Giant);
198 		return (error);
199 	}
200 	NDFREE(&nd, NDF_ONLY_PNBUF);
201 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
202 	mp = nd.ni_vp->v_mount;
203 	vrele(nd.ni_vp);
204 	if (error) {
205 		mtx_unlock(&Giant);
206 		return (error);
207 	}
208 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
209 	vn_finished_write(vmp);
210 	mtx_unlock(&Giant);
211 	return (error);
212 }
213 
214 /*
215  * Get filesystem statistics.
216  */
217 #ifndef _SYS_SYSPROTO_H_
218 struct statfs_args {
219 	char *path;
220 	struct statfs *buf;
221 };
222 #endif
223 int
224 statfs(td, uap)
225 	struct thread *td;
226 	register struct statfs_args /* {
227 		char *path;
228 		struct statfs *buf;
229 	} */ *uap;
230 {
231 	struct statfs sf;
232 	int error;
233 
234 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235 	if (error == 0)
236 		error = copyout(&sf, uap->buf, sizeof(sf));
237 	return (error);
238 }
239 
240 int
241 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242     struct statfs *buf)
243 {
244 	struct mount *mp;
245 	struct statfs *sp, sb;
246 	int error;
247 	struct nameidata nd;
248 
249 	mtx_lock(&Giant);
250 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
251 	error = namei(&nd);
252 	if (error) {
253 		mtx_unlock(&Giant);
254 		return (error);
255 	}
256 	mp = nd.ni_vp->v_mount;
257 	sp = &mp->mnt_stat;
258 	NDFREE(&nd, NDF_ONLY_PNBUF);
259 	vrele(nd.ni_vp);
260 #ifdef MAC
261 	error = mac_check_mount_stat(td->td_ucred, mp);
262 	if (error) {
263 		mtx_unlock(&Giant);
264 		return (error);
265 	}
266 #endif
267 	/*
268 	 * Set these in case the underlying filesystem fails to do so.
269 	 */
270 	sp->f_version = STATFS_VERSION;
271 	sp->f_namemax = NAME_MAX;
272 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
273 	error = VFS_STATFS(mp, sp, td);
274 	mtx_unlock(&Giant);
275 	if (error)
276 		return (error);
277 	if (suser(td)) {
278 		bcopy(sp, &sb, sizeof(sb));
279 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
280 		sp = &sb;
281 	}
282 	*buf = *sp;
283 	return (0);
284 }
285 
286 /*
287  * Get filesystem statistics.
288  */
289 #ifndef _SYS_SYSPROTO_H_
290 struct fstatfs_args {
291 	int fd;
292 	struct statfs *buf;
293 };
294 #endif
295 int
296 fstatfs(td, uap)
297 	struct thread *td;
298 	register struct fstatfs_args /* {
299 		int fd;
300 		struct statfs *buf;
301 	} */ *uap;
302 {
303 	struct statfs sf;
304 	int error;
305 
306 	error = kern_fstatfs(td, uap->fd, &sf);
307 	if (error == 0)
308 		error = copyout(&sf, uap->buf, sizeof(sf));
309 	return (error);
310 }
311 
312 int
313 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
314 {
315 	struct file *fp;
316 	struct mount *mp;
317 	struct statfs *sp, sb;
318 	int error;
319 
320 	error = getvnode(td->td_proc->p_fd, fd, &fp);
321 	if (error)
322 		return (error);
323 	mtx_lock(&Giant);
324 	mp = fp->f_vnode->v_mount;
325 	fdrop(fp, td);
326 	if (mp == NULL) {
327 		mtx_unlock(&Giant);
328 		return (EBADF);
329 	}
330 #ifdef MAC
331 	error = mac_check_mount_stat(td->td_ucred, mp);
332 	if (error) {
333 		mtx_unlock(&Giant);
334 		return (error);
335 	}
336 #endif
337 	sp = &mp->mnt_stat;
338 	/*
339 	 * Set these in case the underlying filesystem fails to do so.
340 	 */
341 	sp->f_version = STATFS_VERSION;
342 	sp->f_namemax = NAME_MAX;
343 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
344 	error = VFS_STATFS(mp, sp, td);
345 	mtx_unlock(&Giant);
346 	if (error)
347 		return (error);
348 	if (suser(td)) {
349 		bcopy(sp, &sb, sizeof(sb));
350 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
351 		sp = &sb;
352 	}
353 	*buf = *sp;
354 	return (0);
355 }
356 
357 /*
358  * Get statistics on all filesystems.
359  */
360 #ifndef _SYS_SYSPROTO_H_
361 struct getfsstat_args {
362 	struct statfs *buf;
363 	long bufsize;
364 	int flags;
365 };
366 #endif
367 int
368 getfsstat(td, uap)
369 	struct thread *td;
370 	register struct getfsstat_args /* {
371 		struct statfs *buf;
372 		long bufsize;
373 		int flags;
374 	} */ *uap;
375 {
376 	struct mount *mp, *nmp;
377 	struct statfs *sp, sb;
378 	caddr_t sfsp;
379 	long count, maxcount, error;
380 
381 	maxcount = uap->bufsize / sizeof(struct statfs);
382 	sfsp = (caddr_t)uap->buf;
383 	count = 0;
384 	mtx_lock(&Giant);
385 	mtx_lock(&mountlist_mtx);
386 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
387 		if (!prison_check_mount(td->td_ucred, mp)) {
388 			nmp = TAILQ_NEXT(mp, mnt_list);
389 			continue;
390 		}
391 #ifdef MAC
392 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
393 			nmp = TAILQ_NEXT(mp, mnt_list);
394 			continue;
395 		}
396 #endif
397 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
398 			nmp = TAILQ_NEXT(mp, mnt_list);
399 			continue;
400 		}
401 		if (sfsp && count < maxcount) {
402 			sp = &mp->mnt_stat;
403 			/*
404 			 * Set these in case the underlying filesystem
405 			 * fails to do so.
406 			 */
407 			sp->f_version = STATFS_VERSION;
408 			sp->f_namemax = NAME_MAX;
409 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
410 			/*
411 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
412 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
413 			 * overrides MNT_WAIT.
414 			 */
415 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
416 			    (uap->flags & MNT_WAIT)) &&
417 			    (error = VFS_STATFS(mp, sp, td))) {
418 				mtx_lock(&mountlist_mtx);
419 				nmp = TAILQ_NEXT(mp, mnt_list);
420 				vfs_unbusy(mp, td);
421 				continue;
422 			}
423 			if (suser(td)) {
424 				bcopy(sp, &sb, sizeof(sb));
425 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
426 				sp = &sb;
427 			}
428 			error = copyout(sp, sfsp, sizeof(*sp));
429 			if (error) {
430 				vfs_unbusy(mp, td);
431 				mtx_unlock(&Giant);
432 				return (error);
433 			}
434 			sfsp += sizeof(*sp);
435 		}
436 		count++;
437 		mtx_lock(&mountlist_mtx);
438 		nmp = TAILQ_NEXT(mp, mnt_list);
439 		vfs_unbusy(mp, td);
440 	}
441 	mtx_unlock(&mountlist_mtx);
442 	mtx_unlock(&Giant);
443 	if (sfsp && count > maxcount)
444 		td->td_retval[0] = maxcount;
445 	else
446 		td->td_retval[0] = count;
447 	return (0);
448 }
449 
450 #ifdef COMPAT_FREEBSD4
451 /*
452  * Get old format filesystem statistics.
453  */
454 static void cvtstatfs(struct statfs *, struct ostatfs *);
455 
456 #ifndef _SYS_SYSPROTO_H_
457 struct freebsd4_statfs_args {
458 	char *path;
459 	struct ostatfs *buf;
460 };
461 #endif
462 int
463 freebsd4_statfs(td, uap)
464 	struct thread *td;
465 	struct freebsd4_statfs_args /* {
466 		char *path;
467 		struct ostatfs *buf;
468 	} */ *uap;
469 {
470 	struct ostatfs osb;
471 	struct statfs sf;
472 	int error;
473 
474 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
475 	if (error)
476 		return (error);
477 	cvtstatfs(&sf, &osb);
478 	return (copyout(&osb, uap->buf, sizeof(osb)));
479 }
480 
481 /*
482  * Get filesystem statistics.
483  */
484 #ifndef _SYS_SYSPROTO_H_
485 struct freebsd4_fstatfs_args {
486 	int fd;
487 	struct ostatfs *buf;
488 };
489 #endif
490 int
491 freebsd4_fstatfs(td, uap)
492 	struct thread *td;
493 	struct freebsd4_fstatfs_args /* {
494 		int fd;
495 		struct ostatfs *buf;
496 	} */ *uap;
497 {
498 	struct ostatfs osb;
499 	struct statfs sf;
500 	int error;
501 
502 	error = kern_fstatfs(td, uap->fd, &sf);
503 	if (error)
504 		return (error);
505 	cvtstatfs(&sf, &osb);
506 	return (copyout(&osb, uap->buf, sizeof(osb)));
507 }
508 
509 /*
510  * Get statistics on all filesystems.
511  */
512 #ifndef _SYS_SYSPROTO_H_
513 struct freebsd4_getfsstat_args {
514 	struct ostatfs *buf;
515 	long bufsize;
516 	int flags;
517 };
518 #endif
519 int
520 freebsd4_getfsstat(td, uap)
521 	struct thread *td;
522 	register struct freebsd4_getfsstat_args /* {
523 		struct ostatfs *buf;
524 		long bufsize;
525 		int flags;
526 	} */ *uap;
527 {
528 	struct mount *mp, *nmp;
529 	struct statfs *sp, sb;
530 	struct ostatfs osb;
531 	caddr_t sfsp;
532 	long count, maxcount, error;
533 
534 	maxcount = uap->bufsize / sizeof(struct ostatfs);
535 	sfsp = (caddr_t)uap->buf;
536 	count = 0;
537 	mtx_lock(&Giant);
538 	mtx_lock(&mountlist_mtx);
539 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
540 		if (!prison_check_mount(td->td_ucred, mp)) {
541 			nmp = TAILQ_NEXT(mp, mnt_list);
542 			continue;
543 		}
544 #ifdef MAC
545 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
546 			nmp = TAILQ_NEXT(mp, mnt_list);
547 			continue;
548 		}
549 #endif
550 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
551 			nmp = TAILQ_NEXT(mp, mnt_list);
552 			continue;
553 		}
554 		if (sfsp && count < maxcount) {
555 			sp = &mp->mnt_stat;
556 			/*
557 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
558 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
559 			 * overrides MNT_WAIT.
560 			 */
561 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
562 			    (uap->flags & MNT_WAIT)) &&
563 			    (error = VFS_STATFS(mp, sp, td))) {
564 				mtx_lock(&mountlist_mtx);
565 				nmp = TAILQ_NEXT(mp, mnt_list);
566 				vfs_unbusy(mp, td);
567 				continue;
568 			}
569 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
570 			if (suser(td)) {
571 				bcopy(sp, &sb, sizeof(sb));
572 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
573 				sp = &sb;
574 			}
575 			cvtstatfs(sp, &osb);
576 			error = copyout(&osb, sfsp, sizeof(osb));
577 			if (error) {
578 				vfs_unbusy(mp, td);
579 				mtx_unlock(&Giant);
580 				return (error);
581 			}
582 			sfsp += sizeof(osb);
583 		}
584 		count++;
585 		mtx_lock(&mountlist_mtx);
586 		nmp = TAILQ_NEXT(mp, mnt_list);
587 		vfs_unbusy(mp, td);
588 	}
589 	mtx_unlock(&mountlist_mtx);
590 	mtx_unlock(&Giant);
591 	if (sfsp && count > maxcount)
592 		td->td_retval[0] = maxcount;
593 	else
594 		td->td_retval[0] = count;
595 	return (0);
596 }
597 
598 /*
599  * Implement fstatfs() for (NFS) file handles.
600  */
601 #ifndef _SYS_SYSPROTO_H_
602 struct freebsd4_fhstatfs_args {
603 	struct fhandle *u_fhp;
604 	struct ostatfs *buf;
605 };
606 #endif
607 int
608 freebsd4_fhstatfs(td, uap)
609 	struct thread *td;
610 	struct freebsd4_fhstatfs_args /* {
611 		struct fhandle *u_fhp;
612 		struct ostatfs *buf;
613 	} */ *uap;
614 {
615 	struct ostatfs osb;
616 	struct statfs sf;
617 	fhandle_t fh;
618 	int error;
619 
620 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
621 	if (error)
622 		return (error);
623 	error = kern_fhstatfs(td, fh, &sf);
624 	if (error)
625 		return (error);
626 	cvtstatfs(&sf, &osb);
627 	return (copyout(&osb, uap->buf, sizeof(osb)));
628 }
629 
630 /*
631  * Convert a new format statfs structure to an old format statfs structure.
632  */
633 static void
634 cvtstatfs(nsp, osp)
635 	struct statfs *nsp;
636 	struct ostatfs *osp;
637 {
638 
639 	bzero(osp, sizeof(*osp));
640 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
641 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
642 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
643 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
644 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
645 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
646 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
647 	osp->f_owner = nsp->f_owner;
648 	osp->f_type = nsp->f_type;
649 	osp->f_flags = nsp->f_flags;
650 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
651 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
652 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
653 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
654 	bcopy(nsp->f_fstypename, osp->f_fstypename,
655 	    MIN(MFSNAMELEN, OMNAMELEN));
656 	bcopy(nsp->f_mntonname, osp->f_mntonname,
657 	    MIN(MFSNAMELEN, OMNAMELEN));
658 	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
659 	    MIN(MFSNAMELEN, OMNAMELEN));
660 	osp->f_fsid = nsp->f_fsid;
661 }
662 #endif /* COMPAT_FREEBSD4 */
663 
664 /*
665  * Change current working directory to a given file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct fchdir_args {
669 	int	fd;
670 };
671 #endif
672 int
673 fchdir(td, uap)
674 	struct thread *td;
675 	struct fchdir_args /* {
676 		int fd;
677 	} */ *uap;
678 {
679 	register struct filedesc *fdp = td->td_proc->p_fd;
680 	struct vnode *vp, *tdp, *vpold;
681 	struct mount *mp;
682 	struct file *fp;
683 	int vfslocked;
684 	int error;
685 
686 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
687 		return (error);
688 	vp = fp->f_vnode;
689 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
690 	VREF(vp);
691 	fdrop(fp, td);
692 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
693 	if (vp->v_type != VDIR)
694 		error = ENOTDIR;
695 #ifdef MAC
696 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
697 	}
698 #endif
699 	else
700 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
701 	while (!error && (mp = vp->v_mountedhere) != NULL) {
702 		int tvfslocked;
703 		if (vfs_busy(mp, 0, 0, td))
704 			continue;
705 		tvfslocked = VFS_LOCK_GIANT(mp);
706 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
707 		vfs_unbusy(mp, td);
708 		if (error) {
709 			VFS_UNLOCK_GIANT(tvfslocked);
710 			break;
711 		}
712 		vput(vp);
713 		VFS_UNLOCK_GIANT(vfslocked);
714 		vp = tdp;
715 		vfslocked = tvfslocked;
716 	}
717 	if (error) {
718 		vput(vp);
719 		VFS_UNLOCK_GIANT(vfslocked);
720 		return (error);
721 	}
722 	VOP_UNLOCK(vp, 0, td);
723 	FILEDESC_LOCK_FAST(fdp);
724 	vpold = fdp->fd_cdir;
725 	fdp->fd_cdir = vp;
726 	FILEDESC_UNLOCK_FAST(fdp);
727 	vrele(vpold);
728 	VFS_UNLOCK_GIANT(vfslocked);
729 	return (0);
730 }
731 
732 /*
733  * Change current working directory (``.'').
734  */
735 #ifndef _SYS_SYSPROTO_H_
736 struct chdir_args {
737 	char	*path;
738 };
739 #endif
740 int
741 chdir(td, uap)
742 	struct thread *td;
743 	struct chdir_args /* {
744 		char *path;
745 	} */ *uap;
746 {
747 
748 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
749 }
750 
751 int
752 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
753 {
754 	register struct filedesc *fdp = td->td_proc->p_fd;
755 	int error;
756 	struct nameidata nd;
757 	struct vnode *vp;
758 	int vfslocked;
759 
760 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
761 	if ((error = namei(&nd)) != 0)
762 		return (error);
763 	vfslocked = NDHASGIANT(&nd);
764 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
765 		vput(nd.ni_vp);
766 		VFS_UNLOCK_GIANT(vfslocked);
767 		NDFREE(&nd, NDF_ONLY_PNBUF);
768 		return (error);
769 	}
770 	VOP_UNLOCK(nd.ni_vp, 0, td);
771 	NDFREE(&nd, NDF_ONLY_PNBUF);
772 	FILEDESC_LOCK_FAST(fdp);
773 	vp = fdp->fd_cdir;
774 	fdp->fd_cdir = nd.ni_vp;
775 	FILEDESC_UNLOCK_FAST(fdp);
776 	vrele(vp);
777 	VFS_UNLOCK_GIANT(vfslocked);
778 	return (0);
779 }
780 
781 /*
782  * Helper function for raised chroot(2) security function:  Refuse if
783  * any filedescriptors are open directories.
784  */
785 static int
786 chroot_refuse_vdir_fds(fdp)
787 	struct filedesc *fdp;
788 {
789 	struct vnode *vp;
790 	struct file *fp;
791 	int fd;
792 
793 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
794 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
795 		fp = fget_locked(fdp, fd);
796 		if (fp == NULL)
797 			continue;
798 		if (fp->f_type == DTYPE_VNODE) {
799 			vp = fp->f_vnode;
800 			if (vp->v_type == VDIR)
801 				return (EPERM);
802 		}
803 	}
804 	return (0);
805 }
806 
807 /*
808  * This sysctl determines if we will allow a process to chroot(2) if it
809  * has a directory open:
810  *	0: disallowed for all processes.
811  *	1: allowed for processes that were not already chroot(2)'ed.
812  *	2: allowed for all processes.
813  */
814 
815 static int chroot_allow_open_directories = 1;
816 
817 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
818      &chroot_allow_open_directories, 0, "");
819 
820 /*
821  * Change notion of root (``/'') directory.
822  */
823 #ifndef _SYS_SYSPROTO_H_
824 struct chroot_args {
825 	char	*path;
826 };
827 #endif
828 int
829 chroot(td, uap)
830 	struct thread *td;
831 	struct chroot_args /* {
832 		char *path;
833 	} */ *uap;
834 {
835 	int error;
836 	struct nameidata nd;
837 	int vfslocked;
838 
839 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
840 	if (error)
841 		return (error);
842 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
843 	    UIO_USERSPACE, uap->path, td);
844 	error = namei(&nd);
845 	if (error)
846 		goto error;
847 	vfslocked = NDHASGIANT(&nd);
848 	if ((error = change_dir(nd.ni_vp, td)) != 0)
849 		goto e_vunlock;
850 #ifdef MAC
851 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
852 		goto e_vunlock;
853 #endif
854 	VOP_UNLOCK(nd.ni_vp, 0, td);
855 	error = change_root(nd.ni_vp, td);
856 	vrele(nd.ni_vp);
857 	VFS_UNLOCK_GIANT(vfslocked);
858 	NDFREE(&nd, NDF_ONLY_PNBUF);
859 	return (error);
860 e_vunlock:
861 	vput(nd.ni_vp);
862 	VFS_UNLOCK_GIANT(vfslocked);
863 error:
864 	NDFREE(&nd, NDF_ONLY_PNBUF);
865 	return (error);
866 }
867 
868 /*
869  * Common routine for chroot and chdir.  Callers must provide a locked vnode
870  * instance.
871  */
872 int
873 change_dir(vp, td)
874 	struct vnode *vp;
875 	struct thread *td;
876 {
877 	int error;
878 
879 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
880 	if (vp->v_type != VDIR)
881 		return (ENOTDIR);
882 #ifdef MAC
883 	error = mac_check_vnode_chdir(td->td_ucred, vp);
884 	if (error)
885 		return (error);
886 #endif
887 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
888 	return (error);
889 }
890 
891 /*
892  * Common routine for kern_chroot() and jail_attach().  The caller is
893  * responsible for invoking suser() and mac_check_chroot() to authorize this
894  * operation.
895  */
896 int
897 change_root(vp, td)
898 	struct vnode *vp;
899 	struct thread *td;
900 {
901 	struct filedesc *fdp;
902 	struct vnode *oldvp;
903 	int error;
904 
905 	VFS_ASSERT_GIANT(vp->v_mount);
906 	fdp = td->td_proc->p_fd;
907 	FILEDESC_LOCK(fdp);
908 	if (chroot_allow_open_directories == 0 ||
909 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
910 		error = chroot_refuse_vdir_fds(fdp);
911 		if (error) {
912 			FILEDESC_UNLOCK(fdp);
913 			return (error);
914 		}
915 	}
916 	oldvp = fdp->fd_rdir;
917 	fdp->fd_rdir = vp;
918 	VREF(fdp->fd_rdir);
919 	if (!fdp->fd_jdir) {
920 		fdp->fd_jdir = vp;
921 		VREF(fdp->fd_jdir);
922 	}
923 	FILEDESC_UNLOCK(fdp);
924 	vrele(oldvp);
925 	return (0);
926 }
927 
928 /*
929  * Check permissions, allocate an open file structure,
930  * and call the device open routine if any.
931  *
932  * MP SAFE
933  */
934 #ifndef _SYS_SYSPROTO_H_
935 struct open_args {
936 	char	*path;
937 	int	flags;
938 	int	mode;
939 };
940 #endif
941 int
942 open(td, uap)
943 	struct thread *td;
944 	register struct open_args /* {
945 		char *path;
946 		int flags;
947 		int mode;
948 	} */ *uap;
949 {
950 	int error;
951 
952 	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
953 	if (mtx_owned(&Giant))
954 		printf("open: %s: %d\n", uap->path, error);
955 	return (error);
956 }
957 
958 int
959 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
960     int mode)
961 {
962 	struct proc *p = td->td_proc;
963 	struct filedesc *fdp = p->p_fd;
964 	struct file *fp;
965 	struct vnode *vp;
966 	struct vattr vat;
967 	struct mount *mp;
968 	int cmode;
969 	struct file *nfp;
970 	int type, indx, error;
971 	struct flock lf;
972 	struct nameidata nd;
973 	int vfslocked;
974 
975 	if ((flags & O_ACCMODE) == O_ACCMODE)
976 		return (EINVAL);
977 	flags = FFLAGS(flags);
978 	error = falloc(td, &nfp, &indx);
979 	if (error)
980 		return (error);
981 	/* An extra reference on `nfp' has been held for us by falloc(). */
982 	fp = nfp;
983 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
984 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
985 	td->td_dupfd = -1;		/* XXX check for fdopen */
986 	error = vn_open(&nd, &flags, cmode, indx);
987 	if (error) {
988 		/*
989 		 * If the vn_open replaced the method vector, something
990 		 * wonderous happened deep below and we just pass it up
991 		 * pretending we know what we do.
992 		 */
993 		if (error == ENXIO && fp->f_ops != &badfileops) {
994 			fdrop(fp, td);
995 			td->td_retval[0] = indx;
996 			return (0);
997 		}
998 
999 		/*
1000 		 * release our own reference
1001 		 */
1002 		fdrop(fp, td);
1003 
1004 		/*
1005 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1006 		 * responsible for dropping the old contents of ofiles[indx]
1007 		 * if it succeeds.
1008 		 */
1009 		if ((error == ENODEV || error == ENXIO) &&
1010 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1011 		    (error =
1012 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1013 			td->td_retval[0] = indx;
1014 			return (0);
1015 		}
1016 		/*
1017 		 * Clean up the descriptor, but only if another thread hadn't
1018 		 * replaced or closed it.
1019 		 */
1020 		fdclose(fdp, fp, indx, td);
1021 
1022 		if (error == ERESTART)
1023 			error = EINTR;
1024 		return (error);
1025 	}
1026 	td->td_dupfd = 0;
1027 	vfslocked = NDHASGIANT(&nd);
1028 	NDFREE(&nd, NDF_ONLY_PNBUF);
1029 	vp = nd.ni_vp;
1030 
1031 	/*
1032 	 * There should be 2 references on the file, one from the descriptor
1033 	 * table, and one for us.
1034 	 *
1035 	 * Handle the case where someone closed the file (via its file
1036 	 * descriptor) while we were blocked.  The end result should look
1037 	 * like opening the file succeeded but it was immediately closed.
1038 	 * We call vn_close() manually because we haven't yet hooked up
1039 	 * the various 'struct file' fields.
1040 	 */
1041 	FILEDESC_LOCK(fdp);
1042 	FILE_LOCK(fp);
1043 	if (fp->f_count == 1) {
1044 		mp = vp->v_mount;
1045 		KASSERT(fdp->fd_ofiles[indx] != fp,
1046 		    ("Open file descriptor lost all refs"));
1047 		FILE_UNLOCK(fp);
1048 		FILEDESC_UNLOCK(fdp);
1049 		VOP_UNLOCK(vp, 0, td);
1050 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1051 		VFS_UNLOCK_GIANT(vfslocked);
1052 		fdrop(fp, td);
1053 		td->td_retval[0] = indx;
1054 		return (0);
1055 	}
1056 	fp->f_vnode = vp;
1057 	if (fp->f_data == NULL)
1058 		fp->f_data = vp;
1059 	fp->f_flag = flags & FMASK;
1060 	if (fp->f_ops == &badfileops)
1061 		fp->f_ops = &vnops;
1062 	fp->f_seqcount = 1;
1063 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1064 	FILE_UNLOCK(fp);
1065 	FILEDESC_UNLOCK(fdp);
1066 
1067 	VOP_UNLOCK(vp, 0, td);
1068 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1069 		lf.l_whence = SEEK_SET;
1070 		lf.l_start = 0;
1071 		lf.l_len = 0;
1072 		if (flags & O_EXLOCK)
1073 			lf.l_type = F_WRLCK;
1074 		else
1075 			lf.l_type = F_RDLCK;
1076 		type = F_FLOCK;
1077 		if ((flags & FNONBLOCK) == 0)
1078 			type |= F_WAIT;
1079 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1080 			    type)) != 0)
1081 			goto bad;
1082 		fp->f_flag |= FHASLOCK;
1083 	}
1084 	if (flags & O_TRUNC) {
1085 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1086 			goto bad;
1087 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1088 		VATTR_NULL(&vat);
1089 		vat.va_size = 0;
1090 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1091 #ifdef MAC
1092 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1093 		if (error == 0)
1094 #endif
1095 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1096 		VOP_UNLOCK(vp, 0, td);
1097 		vn_finished_write(mp);
1098 		if (error)
1099 			goto bad;
1100 	}
1101 	VFS_UNLOCK_GIANT(vfslocked);
1102 	/*
1103 	 * Release our private reference, leaving the one associated with
1104 	 * the descriptor table intact.
1105 	 */
1106 	fdrop(fp, td);
1107 	td->td_retval[0] = indx;
1108 	return (0);
1109 bad:
1110 	VFS_UNLOCK_GIANT(vfslocked);
1111 	fdclose(fdp, fp, indx, td);
1112 	fdrop(fp, td);
1113 	return (error);
1114 }
1115 
1116 #ifdef COMPAT_43
1117 /*
1118  * Create a file.
1119  *
1120  * MP SAFE
1121  */
1122 #ifndef _SYS_SYSPROTO_H_
1123 struct ocreat_args {
1124 	char	*path;
1125 	int	mode;
1126 };
1127 #endif
1128 int
1129 ocreat(td, uap)
1130 	struct thread *td;
1131 	register struct ocreat_args /* {
1132 		char *path;
1133 		int mode;
1134 	} */ *uap;
1135 {
1136 
1137 	return (kern_open(td, uap->path, UIO_USERSPACE,
1138 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1139 }
1140 #endif /* COMPAT_43 */
1141 
1142 /*
1143  * Create a special file.
1144  */
1145 #ifndef _SYS_SYSPROTO_H_
1146 struct mknod_args {
1147 	char	*path;
1148 	int	mode;
1149 	int	dev;
1150 };
1151 #endif
1152 int
1153 mknod(td, uap)
1154 	struct thread *td;
1155 	register struct mknod_args /* {
1156 		char *path;
1157 		int mode;
1158 		int dev;
1159 	} */ *uap;
1160 {
1161 
1162 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1163 }
1164 
1165 int
1166 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1167     int dev)
1168 {
1169 	struct vnode *vp;
1170 	struct mount *mp;
1171 	struct vattr vattr;
1172 	int error;
1173 	int whiteout = 0;
1174 	struct nameidata nd;
1175 	int vfslocked;
1176 
1177 	switch (mode & S_IFMT) {
1178 	case S_IFCHR:
1179 	case S_IFBLK:
1180 		error = suser(td);
1181 		break;
1182 	default:
1183 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1184 		break;
1185 	}
1186 	if (error)
1187 		return (error);
1188 restart:
1189 	bwillwrite();
1190 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1191 	if ((error = namei(&nd)) != 0)
1192 		return (error);
1193 	vfslocked = NDHASGIANT(&nd);
1194 	vp = nd.ni_vp;
1195 	if (vp != NULL) {
1196 		NDFREE(&nd, NDF_ONLY_PNBUF);
1197 		vrele(vp);
1198 		if (vp == nd.ni_dvp)
1199 			vrele(nd.ni_dvp);
1200 		else
1201 			vput(nd.ni_dvp);
1202 		VFS_UNLOCK_GIANT(vfslocked);
1203 		return (EEXIST);
1204 	} else {
1205 		VATTR_NULL(&vattr);
1206 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1207 		vattr.va_mode = (mode & ALLPERMS) &
1208 		    ~td->td_proc->p_fd->fd_cmask;
1209 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1210 		vattr.va_rdev = dev;
1211 		whiteout = 0;
1212 
1213 		switch (mode & S_IFMT) {
1214 		case S_IFMT:	/* used by badsect to flag bad sectors */
1215 			vattr.va_type = VBAD;
1216 			break;
1217 		case S_IFCHR:
1218 			vattr.va_type = VCHR;
1219 			break;
1220 		case S_IFBLK:
1221 			vattr.va_type = VBLK;
1222 			break;
1223 		case S_IFWHT:
1224 			whiteout = 1;
1225 			break;
1226 		default:
1227 			error = EINVAL;
1228 			break;
1229 		}
1230 	}
1231 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1232 		NDFREE(&nd, NDF_ONLY_PNBUF);
1233 		vput(nd.ni_dvp);
1234 		VFS_UNLOCK_GIANT(vfslocked);
1235 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1236 			return (error);
1237 		goto restart;
1238 	}
1239 #ifdef MAC
1240 	if (error == 0 && !whiteout)
1241 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1242 		    &nd.ni_cnd, &vattr);
1243 #endif
1244 	if (!error) {
1245 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1246 		if (whiteout)
1247 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1248 		else {
1249 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1250 						&nd.ni_cnd, &vattr);
1251 			if (error == 0)
1252 				vput(nd.ni_vp);
1253 		}
1254 	}
1255 	NDFREE(&nd, NDF_ONLY_PNBUF);
1256 	vput(nd.ni_dvp);
1257 	vn_finished_write(mp);
1258 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1259 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1260 	VFS_UNLOCK_GIANT(vfslocked);
1261 	return (error);
1262 }
1263 
1264 /*
1265  * Create a named pipe.
1266  */
1267 #ifndef _SYS_SYSPROTO_H_
1268 struct mkfifo_args {
1269 	char	*path;
1270 	int	mode;
1271 };
1272 #endif
1273 int
1274 mkfifo(td, uap)
1275 	struct thread *td;
1276 	register struct mkfifo_args /* {
1277 		char *path;
1278 		int mode;
1279 	} */ *uap;
1280 {
1281 
1282 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1283 }
1284 
1285 int
1286 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1287 {
1288 	struct mount *mp;
1289 	struct vattr vattr;
1290 	int error;
1291 	struct nameidata nd;
1292 	int vfslocked;
1293 
1294 restart:
1295 	bwillwrite();
1296 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1297 	if ((error = namei(&nd)) != 0)
1298 		return (error);
1299 	vfslocked = NDHASGIANT(&nd);
1300 	if (nd.ni_vp != NULL) {
1301 		NDFREE(&nd, NDF_ONLY_PNBUF);
1302 		vrele(nd.ni_vp);
1303 		if (nd.ni_vp == nd.ni_dvp)
1304 			vrele(nd.ni_dvp);
1305 		else
1306 			vput(nd.ni_dvp);
1307 		VFS_UNLOCK_GIANT(vfslocked);
1308 		return (EEXIST);
1309 	}
1310 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1311 		NDFREE(&nd, NDF_ONLY_PNBUF);
1312 		vput(nd.ni_dvp);
1313 		VFS_UNLOCK_GIANT(vfslocked);
1314 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1315 			return (error);
1316 		goto restart;
1317 	}
1318 	VATTR_NULL(&vattr);
1319 	vattr.va_type = VFIFO;
1320 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1321 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1322 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1323 #ifdef MAC
1324 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1325 	    &vattr);
1326 	if (error)
1327 		goto out;
1328 #endif
1329 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1330 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1331 	if (error == 0)
1332 		vput(nd.ni_vp);
1333 #ifdef MAC
1334 out:
1335 #endif
1336 	vput(nd.ni_dvp);
1337 	vn_finished_write(mp);
1338 	VFS_UNLOCK_GIANT(vfslocked);
1339 	NDFREE(&nd, NDF_ONLY_PNBUF);
1340 	return (error);
1341 }
1342 
1343 /*
1344  * Make a hard file link.
1345  */
1346 #ifndef _SYS_SYSPROTO_H_
1347 struct link_args {
1348 	char	*path;
1349 	char	*link;
1350 };
1351 #endif
1352 int
1353 link(td, uap)
1354 	struct thread *td;
1355 	register struct link_args /* {
1356 		char *path;
1357 		char *link;
1358 	} */ *uap;
1359 {
1360 	int error;
1361 
1362 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1363 	return (error);
1364 }
1365 
1366 SYSCTL_DECL(_security_bsd);
1367 
1368 static int hardlink_check_uid = 0;
1369 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1370     &hardlink_check_uid, 0,
1371     "Unprivileged processes cannot create hard links to files owned by other "
1372     "users");
1373 static int hardlink_check_gid = 0;
1374 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1375     &hardlink_check_gid, 0,
1376     "Unprivileged processes cannot create hard links to files owned by other "
1377     "groups");
1378 
1379 static int
1380 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1381 {
1382 	struct vattr va;
1383 	int error;
1384 
1385 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1386 		return (0);
1387 
1388 	if (!hardlink_check_uid && !hardlink_check_gid)
1389 		return (0);
1390 
1391 	error = VOP_GETATTR(vp, &va, cred, td);
1392 	if (error != 0)
1393 		return (error);
1394 
1395 	if (hardlink_check_uid) {
1396 		if (cred->cr_uid != va.va_uid)
1397 			return (EPERM);
1398 	}
1399 
1400 	if (hardlink_check_gid) {
1401 		if (!groupmember(va.va_gid, cred))
1402 			return (EPERM);
1403 	}
1404 
1405 	return (0);
1406 }
1407 
1408 int
1409 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1410 {
1411 	struct vnode *vp;
1412 	struct mount *mp;
1413 	struct nameidata nd;
1414 	int vfslocked;
1415 	int lvfslocked;
1416 	int error;
1417 
1418 	bwillwrite();
1419 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td);
1420 	if ((error = namei(&nd)) != 0)
1421 		return (error);
1422 	vfslocked = NDHASGIANT(&nd);
1423 	NDFREE(&nd, NDF_ONLY_PNBUF);
1424 	vp = nd.ni_vp;
1425 	if (vp->v_type == VDIR) {
1426 		vrele(vp);
1427 		VFS_UNLOCK_GIANT(vfslocked);
1428 		return (EPERM);		/* POSIX */
1429 	}
1430 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1431 		vrele(vp);
1432 		VFS_UNLOCK_GIANT(vfslocked);
1433 		return (error);
1434 	}
1435 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, link, td);
1436 	if ((error = namei(&nd)) == 0) {
1437 		lvfslocked = NDHASGIANT(&nd);
1438 		if (nd.ni_vp != NULL) {
1439 			vrele(nd.ni_vp);
1440 			if (nd.ni_dvp == nd.ni_vp)
1441 				vrele(nd.ni_dvp);
1442 			else
1443 				vput(nd.ni_dvp);
1444 			error = EEXIST;
1445 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1446 		    == 0) {
1447 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1448 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1449 			error = can_hardlink(vp, td, td->td_ucred);
1450 			if (error == 0)
1451 #ifdef MAC
1452 				error = mac_check_vnode_link(td->td_ucred,
1453 				    nd.ni_dvp, vp, &nd.ni_cnd);
1454 			if (error == 0)
1455 #endif
1456 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1457 			VOP_UNLOCK(vp, 0, td);
1458 			vput(nd.ni_dvp);
1459 		}
1460 		NDFREE(&nd, NDF_ONLY_PNBUF);
1461 		VFS_UNLOCK_GIANT(lvfslocked);
1462 	}
1463 	vrele(vp);
1464 	vn_finished_write(mp);
1465 	VFS_UNLOCK_GIANT(vfslocked);
1466 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1467 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1468 	return (error);
1469 }
1470 
1471 /*
1472  * Make a symbolic link.
1473  */
1474 #ifndef _SYS_SYSPROTO_H_
1475 struct symlink_args {
1476 	char	*path;
1477 	char	*link;
1478 };
1479 #endif
1480 int
1481 symlink(td, uap)
1482 	struct thread *td;
1483 	register struct symlink_args /* {
1484 		char *path;
1485 		char *link;
1486 	} */ *uap;
1487 {
1488 
1489 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1490 }
1491 
1492 int
1493 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1494 {
1495 	struct mount *mp;
1496 	struct vattr vattr;
1497 	char *syspath;
1498 	int error;
1499 	struct nameidata nd;
1500 	int vfslocked;
1501 
1502 	if (segflg == UIO_SYSSPACE) {
1503 		syspath = path;
1504 	} else {
1505 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1506 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1507 			goto out;
1508 	}
1509 restart:
1510 	bwillwrite();
1511 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE,
1512 	    segflg, link, td);
1513 	if ((error = namei(&nd)) != 0)
1514 		goto out;
1515 	vfslocked = NDHASGIANT(&nd);
1516 	if (nd.ni_vp) {
1517 		NDFREE(&nd, NDF_ONLY_PNBUF);
1518 		vrele(nd.ni_vp);
1519 		if (nd.ni_vp == nd.ni_dvp)
1520 			vrele(nd.ni_dvp);
1521 		else
1522 			vput(nd.ni_dvp);
1523 		VFS_UNLOCK_GIANT(vfslocked);
1524 		error = EEXIST;
1525 		goto out;
1526 	}
1527 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1528 		NDFREE(&nd, NDF_ONLY_PNBUF);
1529 		vput(nd.ni_dvp);
1530 		VFS_UNLOCK_GIANT(vfslocked);
1531 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1532 			goto out;
1533 		goto restart;
1534 	}
1535 	VATTR_NULL(&vattr);
1536 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1537 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1538 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1539 #ifdef MAC
1540 	vattr.va_type = VLNK;
1541 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1542 	    &vattr);
1543 	if (error)
1544 		goto out2;
1545 #endif
1546 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1547 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1548 	if (error == 0)
1549 		vput(nd.ni_vp);
1550 #ifdef MAC
1551 out2:
1552 #endif
1553 	NDFREE(&nd, NDF_ONLY_PNBUF);
1554 	vput(nd.ni_dvp);
1555 	vn_finished_write(mp);
1556 	VFS_UNLOCK_GIANT(vfslocked);
1557 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1558 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1559 out:
1560 	if (segflg != UIO_SYSSPACE)
1561 		uma_zfree(namei_zone, syspath);
1562 	return (error);
1563 }
1564 
1565 /*
1566  * Delete a whiteout from the filesystem.
1567  */
1568 int
1569 undelete(td, uap)
1570 	struct thread *td;
1571 	register struct undelete_args /* {
1572 		char *path;
1573 	} */ *uap;
1574 {
1575 	int error;
1576 	struct mount *mp;
1577 	struct nameidata nd;
1578 	int vfslocked;
1579 
1580 restart:
1581 	bwillwrite();
1582 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1583 	    uap->path, td);
1584 	error = namei(&nd);
1585 	if (error)
1586 		return (error);
1587 	vfslocked = NDHASGIANT(&nd);
1588 
1589 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1590 		NDFREE(&nd, NDF_ONLY_PNBUF);
1591 		if (nd.ni_vp)
1592 			vrele(nd.ni_vp);
1593 		if (nd.ni_vp == nd.ni_dvp)
1594 			vrele(nd.ni_dvp);
1595 		else
1596 			vput(nd.ni_dvp);
1597 		VFS_UNLOCK_GIANT(vfslocked);
1598 		return (EEXIST);
1599 	}
1600 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1601 		NDFREE(&nd, NDF_ONLY_PNBUF);
1602 		vput(nd.ni_dvp);
1603 		VFS_UNLOCK_GIANT(vfslocked);
1604 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1605 			return (error);
1606 		goto restart;
1607 	}
1608 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1609 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1610 	NDFREE(&nd, NDF_ONLY_PNBUF);
1611 	vput(nd.ni_dvp);
1612 	vn_finished_write(mp);
1613 	VFS_UNLOCK_GIANT(vfslocked);
1614 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1615 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1616 	return (error);
1617 }
1618 
1619 /*
1620  * Delete a name from the filesystem.
1621  */
1622 #ifndef _SYS_SYSPROTO_H_
1623 struct unlink_args {
1624 	char	*path;
1625 };
1626 #endif
1627 int
1628 unlink(td, uap)
1629 	struct thread *td;
1630 	struct unlink_args /* {
1631 		char *path;
1632 	} */ *uap;
1633 {
1634 	int error;
1635 
1636 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1637 	return (error);
1638 }
1639 
1640 int
1641 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1642 {
1643 	struct mount *mp;
1644 	struct vnode *vp;
1645 	int error;
1646 	struct nameidata nd;
1647 	int vfslocked;
1648 
1649 restart:
1650 	bwillwrite();
1651 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1652 	if ((error = namei(&nd)) != 0)
1653 		return (error);
1654 	vfslocked = NDHASGIANT(&nd);
1655 	vp = nd.ni_vp;
1656 	if (vp->v_type == VDIR)
1657 		error = EPERM;		/* POSIX */
1658 	else {
1659 		/*
1660 		 * The root of a mounted filesystem cannot be deleted.
1661 		 *
1662 		 * XXX: can this only be a VDIR case?
1663 		 */
1664 		if (vp->v_vflag & VV_ROOT)
1665 			error = EBUSY;
1666 	}
1667 	if (error == 0) {
1668 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1669 			NDFREE(&nd, NDF_ONLY_PNBUF);
1670 			if (vp == nd.ni_dvp)
1671 				vrele(vp);
1672 			else
1673 				vput(vp);
1674 			vput(nd.ni_dvp);
1675 			VFS_UNLOCK_GIANT(vfslocked);
1676 			if ((error = vn_start_write(NULL, &mp,
1677 			    V_XSLEEP | PCATCH)) != 0)
1678 				return (error);
1679 			goto restart;
1680 		}
1681 #ifdef MAC
1682 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1683 		    &nd.ni_cnd);
1684 		if (error)
1685 			goto out;
1686 #endif
1687 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1688 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1689 #ifdef MAC
1690 out:
1691 #endif
1692 		vn_finished_write(mp);
1693 	}
1694 	NDFREE(&nd, NDF_ONLY_PNBUF);
1695 	if (vp == nd.ni_dvp)
1696 		vrele(vp);
1697 	else
1698 		vput(vp);
1699 	vput(nd.ni_dvp);
1700 	VFS_UNLOCK_GIANT(vfslocked);
1701 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1702 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1703 	return (error);
1704 }
1705 
1706 /*
1707  * Reposition read/write file offset.
1708  */
1709 #ifndef _SYS_SYSPROTO_H_
1710 struct lseek_args {
1711 	int	fd;
1712 	int	pad;
1713 	off_t	offset;
1714 	int	whence;
1715 };
1716 #endif
1717 int
1718 lseek(td, uap)
1719 	struct thread *td;
1720 	register struct lseek_args /* {
1721 		int fd;
1722 		int pad;
1723 		off_t offset;
1724 		int whence;
1725 	} */ *uap;
1726 {
1727 	struct ucred *cred = td->td_ucred;
1728 	struct file *fp;
1729 	struct vnode *vp;
1730 	struct vattr vattr;
1731 	off_t offset;
1732 	int error, noneg;
1733 	int vfslocked;
1734 
1735 	if ((error = fget(td, uap->fd, &fp)) != 0)
1736 		return (error);
1737 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1738 		fdrop(fp, td);
1739 		return (ESPIPE);
1740 	}
1741 	vp = fp->f_vnode;
1742 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1743 	noneg = (vp->v_type != VCHR);
1744 	offset = uap->offset;
1745 	switch (uap->whence) {
1746 	case L_INCR:
1747 		if (noneg &&
1748 		    (fp->f_offset < 0 ||
1749 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1750 			error = EOVERFLOW;
1751 			break;
1752 		}
1753 		offset += fp->f_offset;
1754 		break;
1755 	case L_XTND:
1756 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1757 		error = VOP_GETATTR(vp, &vattr, cred, td);
1758 		VOP_UNLOCK(vp, 0, td);
1759 		if (error)
1760 			break;
1761 		if (noneg &&
1762 		    (vattr.va_size > OFF_MAX ||
1763 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1764 			error = EOVERFLOW;
1765 			break;
1766 		}
1767 		offset += vattr.va_size;
1768 		break;
1769 	case L_SET:
1770 		break;
1771 	default:
1772 		error = EINVAL;
1773 	}
1774 	if (error == 0 && noneg && offset < 0)
1775 		error = EINVAL;
1776 	if (error != 0)
1777 		goto drop;
1778 	fp->f_offset = offset;
1779 	*(off_t *)(td->td_retval) = fp->f_offset;
1780 drop:
1781 	fdrop(fp, td);
1782 	VFS_UNLOCK_GIANT(vfslocked);
1783 	return (error);
1784 }
1785 
1786 #if defined(COMPAT_43)
1787 /*
1788  * Reposition read/write file offset.
1789  */
1790 #ifndef _SYS_SYSPROTO_H_
1791 struct olseek_args {
1792 	int	fd;
1793 	long	offset;
1794 	int	whence;
1795 };
1796 #endif
1797 int
1798 olseek(td, uap)
1799 	struct thread *td;
1800 	register struct olseek_args /* {
1801 		int fd;
1802 		long offset;
1803 		int whence;
1804 	} */ *uap;
1805 {
1806 	struct lseek_args /* {
1807 		int fd;
1808 		int pad;
1809 		off_t offset;
1810 		int whence;
1811 	} */ nuap;
1812 	int error;
1813 
1814 	nuap.fd = uap->fd;
1815 	nuap.offset = uap->offset;
1816 	nuap.whence = uap->whence;
1817 	error = lseek(td, &nuap);
1818 	return (error);
1819 }
1820 #endif /* COMPAT_43 */
1821 
1822 /*
1823  * Check access permissions using passed credentials.
1824  */
1825 static int
1826 vn_access(vp, user_flags, cred, td)
1827 	struct vnode	*vp;
1828 	int		user_flags;
1829 	struct ucred	*cred;
1830 	struct thread	*td;
1831 {
1832 	int error, flags;
1833 
1834 	/* Flags == 0 means only check for existence. */
1835 	error = 0;
1836 	if (user_flags) {
1837 		flags = 0;
1838 		if (user_flags & R_OK)
1839 			flags |= VREAD;
1840 		if (user_flags & W_OK)
1841 			flags |= VWRITE;
1842 		if (user_flags & X_OK)
1843 			flags |= VEXEC;
1844 #ifdef MAC
1845 		error = mac_check_vnode_access(cred, vp, flags);
1846 		if (error)
1847 			return (error);
1848 #endif
1849 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1850 			error = VOP_ACCESS(vp, flags, cred, td);
1851 	}
1852 	return (error);
1853 }
1854 
1855 /*
1856  * Check access permissions using "real" credentials.
1857  */
1858 #ifndef _SYS_SYSPROTO_H_
1859 struct access_args {
1860 	char	*path;
1861 	int	flags;
1862 };
1863 #endif
1864 int
1865 access(td, uap)
1866 	struct thread *td;
1867 	register struct access_args /* {
1868 		char *path;
1869 		int flags;
1870 	} */ *uap;
1871 {
1872 
1873 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1874 }
1875 
1876 int
1877 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1878 {
1879 	struct ucred *cred, *tmpcred;
1880 	register struct vnode *vp;
1881 	struct nameidata nd;
1882 	int vfslocked;
1883 	int error;
1884 
1885 	/*
1886 	 * Create and modify a temporary credential instead of one that
1887 	 * is potentially shared.  This could also mess up socket
1888 	 * buffer accounting which can run in an interrupt context.
1889 	 */
1890 	cred = td->td_ucred;
1891 	tmpcred = crdup(cred);
1892 	tmpcred->cr_uid = cred->cr_ruid;
1893 	tmpcred->cr_groups[0] = cred->cr_rgid;
1894 	td->td_ucred = tmpcred;
1895 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
1896 	if ((error = namei(&nd)) != 0)
1897 		goto out1;
1898 	vfslocked = NDHASGIANT(&nd);
1899 	vp = nd.ni_vp;
1900 
1901 	error = vn_access(vp, flags, tmpcred, td);
1902 	NDFREE(&nd, NDF_ONLY_PNBUF);
1903 	vput(vp);
1904 	VFS_UNLOCK_GIANT(vfslocked);
1905 out1:
1906 	td->td_ucred = cred;
1907 	crfree(tmpcred);
1908 	return (error);
1909 }
1910 
1911 /*
1912  * Check access permissions using "effective" credentials.
1913  */
1914 #ifndef _SYS_SYSPROTO_H_
1915 struct eaccess_args {
1916 	char	*path;
1917 	int	flags;
1918 };
1919 #endif
1920 int
1921 eaccess(td, uap)
1922 	struct thread *td;
1923 	register struct eaccess_args /* {
1924 		char *path;
1925 		int flags;
1926 	} */ *uap;
1927 {
1928 	struct nameidata nd;
1929 	struct vnode *vp;
1930 	int vfslocked;
1931 	int error;
1932 
1933 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
1934 	    uap->path, td);
1935 	if ((error = namei(&nd)) != 0)
1936 		return (error);
1937 	vp = nd.ni_vp;
1938 	vfslocked = NDHASGIANT(&nd);
1939 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1940 	NDFREE(&nd, NDF_ONLY_PNBUF);
1941 	vput(vp);
1942 	VFS_UNLOCK_GIANT(vfslocked);
1943 	return (error);
1944 }
1945 
1946 #if defined(COMPAT_43)
1947 /*
1948  * Get file status; this version follows links.
1949  */
1950 #ifndef _SYS_SYSPROTO_H_
1951 struct ostat_args {
1952 	char	*path;
1953 	struct ostat *ub;
1954 };
1955 #endif
1956 int
1957 ostat(td, uap)
1958 	struct thread *td;
1959 	register struct ostat_args /* {
1960 		char *path;
1961 		struct ostat *ub;
1962 	} */ *uap;
1963 {
1964 	struct stat sb;
1965 	struct ostat osb;
1966 	int error;
1967 
1968 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1969 	if (error)
1970 		return (error);
1971 	cvtstat(&sb, &osb);
1972 	error = copyout(&osb, uap->ub, sizeof (osb));
1973 	return (error);
1974 }
1975 
1976 /*
1977  * Get file status; this version does not follow links.
1978  */
1979 #ifndef _SYS_SYSPROTO_H_
1980 struct olstat_args {
1981 	char	*path;
1982 	struct ostat *ub;
1983 };
1984 #endif
1985 int
1986 olstat(td, uap)
1987 	struct thread *td;
1988 	register struct olstat_args /* {
1989 		char *path;
1990 		struct ostat *ub;
1991 	} */ *uap;
1992 {
1993 	struct stat sb;
1994 	struct ostat osb;
1995 	int error;
1996 
1997 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1998 	if (error)
1999 		return (error);
2000 	cvtstat(&sb, &osb);
2001 	error = copyout(&osb, uap->ub, sizeof (osb));
2002 	return (error);
2003 }
2004 
2005 /*
2006  * Convert from an old to a new stat structure.
2007  */
2008 void
2009 cvtstat(st, ost)
2010 	struct stat *st;
2011 	struct ostat *ost;
2012 {
2013 
2014 	ost->st_dev = st->st_dev;
2015 	ost->st_ino = st->st_ino;
2016 	ost->st_mode = st->st_mode;
2017 	ost->st_nlink = st->st_nlink;
2018 	ost->st_uid = st->st_uid;
2019 	ost->st_gid = st->st_gid;
2020 	ost->st_rdev = st->st_rdev;
2021 	if (st->st_size < (quad_t)1 << 32)
2022 		ost->st_size = st->st_size;
2023 	else
2024 		ost->st_size = -2;
2025 	ost->st_atime = st->st_atime;
2026 	ost->st_mtime = st->st_mtime;
2027 	ost->st_ctime = st->st_ctime;
2028 	ost->st_blksize = st->st_blksize;
2029 	ost->st_blocks = st->st_blocks;
2030 	ost->st_flags = st->st_flags;
2031 	ost->st_gen = st->st_gen;
2032 }
2033 #endif /* COMPAT_43 */
2034 
2035 /*
2036  * Get file status; this version follows links.
2037  */
2038 #ifndef _SYS_SYSPROTO_H_
2039 struct stat_args {
2040 	char	*path;
2041 	struct stat *ub;
2042 };
2043 #endif
2044 int
2045 stat(td, uap)
2046 	struct thread *td;
2047 	register struct stat_args /* {
2048 		char *path;
2049 		struct stat *ub;
2050 	} */ *uap;
2051 {
2052 	struct stat sb;
2053 	int error;
2054 
2055 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2056 	if (error == 0)
2057 		error = copyout(&sb, uap->ub, sizeof (sb));
2058 	return (error);
2059 }
2060 
2061 int
2062 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2063 {
2064 	struct nameidata nd;
2065 	struct stat sb;
2066 	int error, vfslocked;
2067 
2068 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE,
2069 	    pathseg, path, td);
2070 	if ((error = namei(&nd)) != 0)
2071 		return (error);
2072 	vfslocked = NDHASGIANT(&nd);
2073 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2074 	NDFREE(&nd, NDF_ONLY_PNBUF);
2075 	vput(nd.ni_vp);
2076 	VFS_UNLOCK_GIANT(vfslocked);
2077 	if (error)
2078 		return (error);
2079 	*sbp = sb;
2080 	return (0);
2081 }
2082 
2083 /*
2084  * Get file status; this version does not follow links.
2085  */
2086 #ifndef _SYS_SYSPROTO_H_
2087 struct lstat_args {
2088 	char	*path;
2089 	struct stat *ub;
2090 };
2091 #endif
2092 int
2093 lstat(td, uap)
2094 	struct thread *td;
2095 	register struct lstat_args /* {
2096 		char *path;
2097 		struct stat *ub;
2098 	} */ *uap;
2099 {
2100 	struct stat sb;
2101 	int error;
2102 
2103 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2104 	if (error == 0)
2105 		error = copyout(&sb, uap->ub, sizeof (sb));
2106 	return (error);
2107 }
2108 
2109 int
2110 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2111 {
2112 	struct vnode *vp;
2113 	struct stat sb;
2114 	struct nameidata nd;
2115 	int error, vfslocked;
2116 
2117 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE,
2118 	    pathseg, path, td);
2119 	if ((error = namei(&nd)) != 0)
2120 		return (error);
2121 	vfslocked = NDHASGIANT(&nd);
2122 	vp = nd.ni_vp;
2123 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2124 	NDFREE(&nd, NDF_ONLY_PNBUF);
2125 	vput(vp);
2126 	VFS_UNLOCK_GIANT(vfslocked);
2127 	if (error)
2128 		return (error);
2129 	*sbp = sb;
2130 	return (0);
2131 }
2132 
2133 /*
2134  * Implementation of the NetBSD [l]stat() functions.
2135  */
2136 void
2137 cvtnstat(sb, nsb)
2138 	struct stat *sb;
2139 	struct nstat *nsb;
2140 {
2141 	bzero(nsb, sizeof *nsb);
2142 	nsb->st_dev = sb->st_dev;
2143 	nsb->st_ino = sb->st_ino;
2144 	nsb->st_mode = sb->st_mode;
2145 	nsb->st_nlink = sb->st_nlink;
2146 	nsb->st_uid = sb->st_uid;
2147 	nsb->st_gid = sb->st_gid;
2148 	nsb->st_rdev = sb->st_rdev;
2149 	nsb->st_atimespec = sb->st_atimespec;
2150 	nsb->st_mtimespec = sb->st_mtimespec;
2151 	nsb->st_ctimespec = sb->st_ctimespec;
2152 	nsb->st_size = sb->st_size;
2153 	nsb->st_blocks = sb->st_blocks;
2154 	nsb->st_blksize = sb->st_blksize;
2155 	nsb->st_flags = sb->st_flags;
2156 	nsb->st_gen = sb->st_gen;
2157 	nsb->st_birthtimespec = sb->st_birthtimespec;
2158 }
2159 
2160 #ifndef _SYS_SYSPROTO_H_
2161 struct nstat_args {
2162 	char	*path;
2163 	struct nstat *ub;
2164 };
2165 #endif
2166 int
2167 nstat(td, uap)
2168 	struct thread *td;
2169 	register struct nstat_args /* {
2170 		char *path;
2171 		struct nstat *ub;
2172 	} */ *uap;
2173 {
2174 	struct stat sb;
2175 	struct nstat nsb;
2176 	int error;
2177 
2178 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2179 	if (error)
2180 		return (error);
2181 	cvtnstat(&sb, &nsb);
2182 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2183 	return (error);
2184 }
2185 
2186 /*
2187  * NetBSD lstat.  Get file status; this version does not follow links.
2188  */
2189 #ifndef _SYS_SYSPROTO_H_
2190 struct lstat_args {
2191 	char	*path;
2192 	struct stat *ub;
2193 };
2194 #endif
2195 int
2196 nlstat(td, uap)
2197 	struct thread *td;
2198 	register struct nlstat_args /* {
2199 		char *path;
2200 		struct nstat *ub;
2201 	} */ *uap;
2202 {
2203 	struct stat sb;
2204 	struct nstat nsb;
2205 	int error;
2206 
2207 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2208 	if (error)
2209 		return (error);
2210 	cvtnstat(&sb, &nsb);
2211 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2212 	return (error);
2213 }
2214 
2215 /*
2216  * Get configurable pathname variables.
2217  */
2218 #ifndef _SYS_SYSPROTO_H_
2219 struct pathconf_args {
2220 	char	*path;
2221 	int	name;
2222 };
2223 #endif
2224 int
2225 pathconf(td, uap)
2226 	struct thread *td;
2227 	register struct pathconf_args /* {
2228 		char *path;
2229 		int name;
2230 	} */ *uap;
2231 {
2232 
2233 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2234 }
2235 
2236 int
2237 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2238 {
2239 	struct nameidata nd;
2240 	int error, vfslocked;
2241 
2242 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2243 	if ((error = namei(&nd)) != 0)
2244 		return (error);
2245 	vfslocked = NDHASGIANT(&nd);
2246 	NDFREE(&nd, NDF_ONLY_PNBUF);
2247 
2248 	/* If asynchronous I/O is available, it works for all files. */
2249 	if (name == _PC_ASYNC_IO)
2250 		td->td_retval[0] = async_io_version;
2251 	else
2252 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2253 	vput(nd.ni_vp);
2254 	VFS_UNLOCK_GIANT(vfslocked);
2255 	return (error);
2256 }
2257 
2258 /*
2259  * Return target name of a symbolic link.
2260  */
2261 #ifndef _SYS_SYSPROTO_H_
2262 struct readlink_args {
2263 	char	*path;
2264 	char	*buf;
2265 	int	count;
2266 };
2267 #endif
2268 int
2269 readlink(td, uap)
2270 	struct thread *td;
2271 	register struct readlink_args /* {
2272 		char *path;
2273 		char *buf;
2274 		int count;
2275 	} */ *uap;
2276 {
2277 
2278 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2279 	    UIO_USERSPACE, uap->count));
2280 }
2281 
2282 int
2283 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2284     enum uio_seg bufseg, int count)
2285 {
2286 	register struct vnode *vp;
2287 	struct iovec aiov;
2288 	struct uio auio;
2289 	int error;
2290 	struct nameidata nd;
2291 	int vfslocked;
2292 
2293 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2294 	if ((error = namei(&nd)) != 0)
2295 		return (error);
2296 	NDFREE(&nd, NDF_ONLY_PNBUF);
2297 	vfslocked = NDHASGIANT(&nd);
2298 	vp = nd.ni_vp;
2299 #ifdef MAC
2300 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2301 	if (error) {
2302 		vput(vp);
2303 		VFS_UNLOCK_GIANT(vfslocked);
2304 		return (error);
2305 	}
2306 #endif
2307 	if (vp->v_type != VLNK)
2308 		error = EINVAL;
2309 	else {
2310 		aiov.iov_base = buf;
2311 		aiov.iov_len = count;
2312 		auio.uio_iov = &aiov;
2313 		auio.uio_iovcnt = 1;
2314 		auio.uio_offset = 0;
2315 		auio.uio_rw = UIO_READ;
2316 		auio.uio_segflg = bufseg;
2317 		auio.uio_td = td;
2318 		auio.uio_resid = count;
2319 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2320 	}
2321 	vput(vp);
2322 	VFS_UNLOCK_GIANT(vfslocked);
2323 	td->td_retval[0] = count - auio.uio_resid;
2324 	return (error);
2325 }
2326 
2327 /*
2328  * Common implementation code for chflags() and fchflags().
2329  */
2330 static int
2331 setfflags(td, vp, flags)
2332 	struct thread *td;
2333 	struct vnode *vp;
2334 	int flags;
2335 {
2336 	int error;
2337 	struct mount *mp;
2338 	struct vattr vattr;
2339 
2340 	/*
2341 	 * Prevent non-root users from setting flags on devices.  When
2342 	 * a device is reused, users can retain ownership of the device
2343 	 * if they are allowed to set flags and programs assume that
2344 	 * chown can't fail when done as root.
2345 	 */
2346 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2347 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2348 		if (error)
2349 			return (error);
2350 	}
2351 
2352 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2353 		return (error);
2354 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2355 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2356 	VATTR_NULL(&vattr);
2357 	vattr.va_flags = flags;
2358 #ifdef MAC
2359 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2360 	if (error == 0)
2361 #endif
2362 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2363 	VOP_UNLOCK(vp, 0, td);
2364 	vn_finished_write(mp);
2365 	return (error);
2366 }
2367 
2368 /*
2369  * Change flags of a file given a path name.
2370  */
2371 #ifndef _SYS_SYSPROTO_H_
2372 struct chflags_args {
2373 	char	*path;
2374 	int	flags;
2375 };
2376 #endif
2377 int
2378 chflags(td, uap)
2379 	struct thread *td;
2380 	register struct chflags_args /* {
2381 		char *path;
2382 		int flags;
2383 	} */ *uap;
2384 {
2385 	int error;
2386 	struct nameidata nd;
2387 	int vfslocked;
2388 
2389 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2390 	if ((error = namei(&nd)) != 0)
2391 		return (error);
2392 	NDFREE(&nd, NDF_ONLY_PNBUF);
2393 	vfslocked = NDHASGIANT(&nd);
2394 	error = setfflags(td, nd.ni_vp, uap->flags);
2395 	vrele(nd.ni_vp);
2396 	VFS_UNLOCK_GIANT(vfslocked);
2397 	return (error);
2398 }
2399 
2400 /*
2401  * Same as chflags() but doesn't follow symlinks.
2402  */
2403 int
2404 lchflags(td, uap)
2405 	struct thread *td;
2406 	register struct lchflags_args /* {
2407 		char *path;
2408 		int flags;
2409 	} */ *uap;
2410 {
2411 	int error;
2412 	struct nameidata nd;
2413 	int vfslocked;
2414 
2415 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2416 	if ((error = namei(&nd)) != 0)
2417 		return (error);
2418 	vfslocked = NDHASGIANT(&nd);
2419 	NDFREE(&nd, NDF_ONLY_PNBUF);
2420 	error = setfflags(td, nd.ni_vp, uap->flags);
2421 	vrele(nd.ni_vp);
2422 	VFS_UNLOCK_GIANT(vfslocked);
2423 	return (error);
2424 }
2425 
2426 /*
2427  * Change flags of a file given a file descriptor.
2428  */
2429 #ifndef _SYS_SYSPROTO_H_
2430 struct fchflags_args {
2431 	int	fd;
2432 	int	flags;
2433 };
2434 #endif
2435 int
2436 fchflags(td, uap)
2437 	struct thread *td;
2438 	register struct fchflags_args /* {
2439 		int fd;
2440 		int flags;
2441 	} */ *uap;
2442 {
2443 	struct file *fp;
2444 	int vfslocked;
2445 	int error;
2446 
2447 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2448 		return (error);
2449 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2450 	error = setfflags(td, fp->f_vnode, uap->flags);
2451 	fdrop(fp, td);
2452 	VFS_UNLOCK_GIANT(vfslocked);
2453 	return (error);
2454 }
2455 
2456 /*
2457  * Common implementation code for chmod(), lchmod() and fchmod().
2458  */
2459 static int
2460 setfmode(td, vp, mode)
2461 	struct thread *td;
2462 	struct vnode *vp;
2463 	int mode;
2464 {
2465 	int error;
2466 	struct mount *mp;
2467 	struct vattr vattr;
2468 
2469 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2470 		return (error);
2471 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2472 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2473 	VATTR_NULL(&vattr);
2474 	vattr.va_mode = mode & ALLPERMS;
2475 #ifdef MAC
2476 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2477 	if (error == 0)
2478 #endif
2479 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2480 	VOP_UNLOCK(vp, 0, td);
2481 	vn_finished_write(mp);
2482 	return (error);
2483 }
2484 
2485 /*
2486  * Change mode of a file given path name.
2487  */
2488 #ifndef _SYS_SYSPROTO_H_
2489 struct chmod_args {
2490 	char	*path;
2491 	int	mode;
2492 };
2493 #endif
2494 int
2495 chmod(td, uap)
2496 	struct thread *td;
2497 	register struct chmod_args /* {
2498 		char *path;
2499 		int mode;
2500 	} */ *uap;
2501 {
2502 
2503 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2504 }
2505 
2506 int
2507 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2508 {
2509 	int error;
2510 	struct nameidata nd;
2511 	int vfslocked;
2512 
2513 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2514 	if ((error = namei(&nd)) != 0)
2515 		return (error);
2516 	vfslocked = NDHASGIANT(&nd);
2517 	NDFREE(&nd, NDF_ONLY_PNBUF);
2518 	error = setfmode(td, nd.ni_vp, mode);
2519 	vrele(nd.ni_vp);
2520 	VFS_UNLOCK_GIANT(vfslocked);
2521 	return (error);
2522 }
2523 
2524 /*
2525  * Change mode of a file given path name (don't follow links.)
2526  */
2527 #ifndef _SYS_SYSPROTO_H_
2528 struct lchmod_args {
2529 	char	*path;
2530 	int	mode;
2531 };
2532 #endif
2533 int
2534 lchmod(td, uap)
2535 	struct thread *td;
2536 	register struct lchmod_args /* {
2537 		char *path;
2538 		int mode;
2539 	} */ *uap;
2540 {
2541 	int error;
2542 	struct nameidata nd;
2543 	int vfslocked;
2544 
2545 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2546 	if ((error = namei(&nd)) != 0)
2547 		return (error);
2548 	vfslocked = NDHASGIANT(&nd);
2549 	NDFREE(&nd, NDF_ONLY_PNBUF);
2550 	error = setfmode(td, nd.ni_vp, uap->mode);
2551 	vrele(nd.ni_vp);
2552 	VFS_UNLOCK_GIANT(vfslocked);
2553 	return (error);
2554 }
2555 
2556 /*
2557  * Change mode of a file given a file descriptor.
2558  */
2559 #ifndef _SYS_SYSPROTO_H_
2560 struct fchmod_args {
2561 	int	fd;
2562 	int	mode;
2563 };
2564 #endif
2565 int
2566 fchmod(td, uap)
2567 	struct thread *td;
2568 	register struct fchmod_args /* {
2569 		int fd;
2570 		int mode;
2571 	} */ *uap;
2572 {
2573 	struct file *fp;
2574 	int vfslocked;
2575 	int error;
2576 
2577 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2578 		return (error);
2579 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2580 	error = setfmode(td, fp->f_vnode, uap->mode);
2581 	fdrop(fp, td);
2582 	VFS_UNLOCK_GIANT(vfslocked);
2583 	return (error);
2584 }
2585 
2586 /*
2587  * Common implementation for chown(), lchown(), and fchown()
2588  */
2589 static int
2590 setfown(td, vp, uid, gid)
2591 	struct thread *td;
2592 	struct vnode *vp;
2593 	uid_t uid;
2594 	gid_t gid;
2595 {
2596 	int error;
2597 	struct mount *mp;
2598 	struct vattr vattr;
2599 
2600 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2601 		return (error);
2602 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2603 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2604 	VATTR_NULL(&vattr);
2605 	vattr.va_uid = uid;
2606 	vattr.va_gid = gid;
2607 #ifdef MAC
2608 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2609 	    vattr.va_gid);
2610 	if (error == 0)
2611 #endif
2612 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2613 	VOP_UNLOCK(vp, 0, td);
2614 	vn_finished_write(mp);
2615 	return (error);
2616 }
2617 
2618 /*
2619  * Set ownership given a path name.
2620  */
2621 #ifndef _SYS_SYSPROTO_H_
2622 struct chown_args {
2623 	char	*path;
2624 	int	uid;
2625 	int	gid;
2626 };
2627 #endif
2628 int
2629 chown(td, uap)
2630 	struct thread *td;
2631 	register struct chown_args /* {
2632 		char *path;
2633 		int uid;
2634 		int gid;
2635 	} */ *uap;
2636 {
2637 
2638 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2639 }
2640 
2641 int
2642 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2643     int gid)
2644 {
2645 	int error;
2646 	struct nameidata nd;
2647 	int vfslocked;
2648 
2649 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2650 	if ((error = namei(&nd)) != 0)
2651 		return (error);
2652 	vfslocked = NDHASGIANT(&nd);
2653 	NDFREE(&nd, NDF_ONLY_PNBUF);
2654 	error = setfown(td, nd.ni_vp, uid, gid);
2655 	vrele(nd.ni_vp);
2656 	VFS_UNLOCK_GIANT(vfslocked);
2657 	return (error);
2658 }
2659 
2660 /*
2661  * Set ownership given a path name, do not cross symlinks.
2662  */
2663 #ifndef _SYS_SYSPROTO_H_
2664 struct lchown_args {
2665 	char	*path;
2666 	int	uid;
2667 	int	gid;
2668 };
2669 #endif
2670 int
2671 lchown(td, uap)
2672 	struct thread *td;
2673 	register struct lchown_args /* {
2674 		char *path;
2675 		int uid;
2676 		int gid;
2677 	} */ *uap;
2678 {
2679 
2680 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2681 }
2682 
2683 int
2684 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2685     int gid)
2686 {
2687 	int error;
2688 	struct nameidata nd;
2689 	int vfslocked;
2690 
2691 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2692 	if ((error = namei(&nd)) != 0)
2693 		return (error);
2694 	vfslocked = NDHASGIANT(&nd);
2695 	NDFREE(&nd, NDF_ONLY_PNBUF);
2696 	error = setfown(td, nd.ni_vp, uid, gid);
2697 	vrele(nd.ni_vp);
2698 	VFS_UNLOCK_GIANT(vfslocked);
2699 	return (error);
2700 }
2701 
2702 /*
2703  * Set ownership given a file descriptor.
2704  */
2705 #ifndef _SYS_SYSPROTO_H_
2706 struct fchown_args {
2707 	int	fd;
2708 	int	uid;
2709 	int	gid;
2710 };
2711 #endif
2712 int
2713 fchown(td, uap)
2714 	struct thread *td;
2715 	register struct fchown_args /* {
2716 		int fd;
2717 		int uid;
2718 		int gid;
2719 	} */ *uap;
2720 {
2721 	struct file *fp;
2722 	int vfslocked;
2723 	int error;
2724 
2725 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2726 		return (error);
2727 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2728 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2729 	fdrop(fp, td);
2730 	VFS_UNLOCK_GIANT(vfslocked);
2731 	return (error);
2732 }
2733 
2734 /*
2735  * Common implementation code for utimes(), lutimes(), and futimes().
2736  */
2737 static int
2738 getutimes(usrtvp, tvpseg, tsp)
2739 	const struct timeval *usrtvp;
2740 	enum uio_seg tvpseg;
2741 	struct timespec *tsp;
2742 {
2743 	struct timeval tv[2];
2744 	const struct timeval *tvp;
2745 	int error;
2746 
2747 	if (usrtvp == NULL) {
2748 		microtime(&tv[0]);
2749 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2750 		tsp[1] = tsp[0];
2751 	} else {
2752 		if (tvpseg == UIO_SYSSPACE) {
2753 			tvp = usrtvp;
2754 		} else {
2755 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2756 				return (error);
2757 			tvp = tv;
2758 		}
2759 
2760 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2761 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2762 	}
2763 	return (0);
2764 }
2765 
2766 /*
2767  * Common implementation code for utimes(), lutimes(), and futimes().
2768  */
2769 static int
2770 setutimes(td, vp, ts, numtimes, nullflag)
2771 	struct thread *td;
2772 	struct vnode *vp;
2773 	const struct timespec *ts;
2774 	int numtimes;
2775 	int nullflag;
2776 {
2777 	int error, setbirthtime;
2778 	struct mount *mp;
2779 	struct vattr vattr;
2780 
2781 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2782 		return (error);
2783 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2784 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2785 	setbirthtime = 0;
2786 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2787 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2788 		setbirthtime = 1;
2789 	VATTR_NULL(&vattr);
2790 	vattr.va_atime = ts[0];
2791 	vattr.va_mtime = ts[1];
2792 	if (setbirthtime)
2793 		vattr.va_birthtime = ts[1];
2794 	if (numtimes > 2)
2795 		vattr.va_birthtime = ts[2];
2796 	if (nullflag)
2797 		vattr.va_vaflags |= VA_UTIMES_NULL;
2798 #ifdef MAC
2799 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2800 	    vattr.va_mtime);
2801 #endif
2802 	if (error == 0)
2803 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2804 	VOP_UNLOCK(vp, 0, td);
2805 	vn_finished_write(mp);
2806 	return (error);
2807 }
2808 
2809 /*
2810  * Set the access and modification times of a file.
2811  */
2812 #ifndef _SYS_SYSPROTO_H_
2813 struct utimes_args {
2814 	char	*path;
2815 	struct	timeval *tptr;
2816 };
2817 #endif
2818 int
2819 utimes(td, uap)
2820 	struct thread *td;
2821 	register struct utimes_args /* {
2822 		char *path;
2823 		struct timeval *tptr;
2824 	} */ *uap;
2825 {
2826 
2827 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2828 	    UIO_USERSPACE));
2829 }
2830 
2831 int
2832 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2833     struct timeval *tptr, enum uio_seg tptrseg)
2834 {
2835 	struct timespec ts[2];
2836 	int error;
2837 	struct nameidata nd;
2838 	int vfslocked;
2839 
2840 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2841 		return (error);
2842 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2843 	if ((error = namei(&nd)) != 0)
2844 		return (error);
2845 	vfslocked = NDHASGIANT(&nd);
2846 	NDFREE(&nd, NDF_ONLY_PNBUF);
2847 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2848 	vrele(nd.ni_vp);
2849 	VFS_UNLOCK_GIANT(vfslocked);
2850 	return (error);
2851 }
2852 
2853 /*
2854  * Set the access and modification times of a file.
2855  */
2856 #ifndef _SYS_SYSPROTO_H_
2857 struct lutimes_args {
2858 	char	*path;
2859 	struct	timeval *tptr;
2860 };
2861 #endif
2862 int
2863 lutimes(td, uap)
2864 	struct thread *td;
2865 	register struct lutimes_args /* {
2866 		char *path;
2867 		struct timeval *tptr;
2868 	} */ *uap;
2869 {
2870 
2871 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2872 	    UIO_USERSPACE));
2873 }
2874 
2875 int
2876 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2877     struct timeval *tptr, enum uio_seg tptrseg)
2878 {
2879 	struct timespec ts[2];
2880 	int error;
2881 	struct nameidata nd;
2882 	int vfslocked;
2883 
2884 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2885 		return (error);
2886 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2887 	if ((error = namei(&nd)) != 0)
2888 		return (error);
2889 	vfslocked = NDHASGIANT(&nd);
2890 	NDFREE(&nd, NDF_ONLY_PNBUF);
2891 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2892 	vrele(nd.ni_vp);
2893 	VFS_UNLOCK_GIANT(vfslocked);
2894 	return (error);
2895 }
2896 
2897 /*
2898  * Set the access and modification times of a file.
2899  */
2900 #ifndef _SYS_SYSPROTO_H_
2901 struct futimes_args {
2902 	int	fd;
2903 	struct	timeval *tptr;
2904 };
2905 #endif
2906 int
2907 futimes(td, uap)
2908 	struct thread *td;
2909 	register struct futimes_args /* {
2910 		int  fd;
2911 		struct timeval *tptr;
2912 	} */ *uap;
2913 {
2914 
2915 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2916 }
2917 
2918 int
2919 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2920     enum uio_seg tptrseg)
2921 {
2922 	struct timespec ts[2];
2923 	struct file *fp;
2924 	int vfslocked;
2925 	int error;
2926 
2927 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2928 		return (error);
2929 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2930 		return (error);
2931 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2932 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2933 	fdrop(fp, td);
2934 	VFS_UNLOCK_GIANT(vfslocked);
2935 	return (error);
2936 }
2937 
2938 /*
2939  * Truncate a file given its path name.
2940  */
2941 #ifndef _SYS_SYSPROTO_H_
2942 struct truncate_args {
2943 	char	*path;
2944 	int	pad;
2945 	off_t	length;
2946 };
2947 #endif
2948 int
2949 truncate(td, uap)
2950 	struct thread *td;
2951 	register struct truncate_args /* {
2952 		char *path;
2953 		int pad;
2954 		off_t length;
2955 	} */ *uap;
2956 {
2957 
2958 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2959 }
2960 
2961 int
2962 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2963 {
2964 	struct mount *mp;
2965 	struct vnode *vp;
2966 	struct vattr vattr;
2967 	int error;
2968 	struct nameidata nd;
2969 	int vfslocked;
2970 
2971 	if (length < 0)
2972 		return(EINVAL);
2973 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2974 	if ((error = namei(&nd)) != 0)
2975 		return (error);
2976 	vfslocked = NDHASGIANT(&nd);
2977 	vp = nd.ni_vp;
2978 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2979 		vrele(vp);
2980 		VFS_UNLOCK_GIANT(vfslocked);
2981 		return (error);
2982 	}
2983 	NDFREE(&nd, NDF_ONLY_PNBUF);
2984 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2985 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2986 	if (vp->v_type == VDIR)
2987 		error = EISDIR;
2988 #ifdef MAC
2989 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2990 	}
2991 #endif
2992 	else if ((error = vn_writechk(vp)) == 0 &&
2993 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2994 		VATTR_NULL(&vattr);
2995 		vattr.va_size = length;
2996 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2997 	}
2998 	vput(vp);
2999 	vn_finished_write(mp);
3000 	VFS_UNLOCK_GIANT(vfslocked);
3001 	return (error);
3002 }
3003 
3004 /*
3005  * Truncate a file given a file descriptor.
3006  */
3007 #ifndef _SYS_SYSPROTO_H_
3008 struct ftruncate_args {
3009 	int	fd;
3010 	int	pad;
3011 	off_t	length;
3012 };
3013 #endif
3014 int
3015 ftruncate(td, uap)
3016 	struct thread *td;
3017 	register struct ftruncate_args /* {
3018 		int fd;
3019 		int pad;
3020 		off_t length;
3021 	} */ *uap;
3022 {
3023 	struct mount *mp;
3024 	struct vattr vattr;
3025 	struct vnode *vp;
3026 	struct file *fp;
3027 	int vfslocked;
3028 	int error;
3029 
3030 	if (uap->length < 0)
3031 		return(EINVAL);
3032 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3033 		return (error);
3034 	if ((fp->f_flag & FWRITE) == 0) {
3035 		fdrop(fp, td);
3036 		return (EINVAL);
3037 	}
3038 	vp = fp->f_vnode;
3039 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3040 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3041 		goto drop;
3042 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3043 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3044 	if (vp->v_type == VDIR)
3045 		error = EISDIR;
3046 #ifdef MAC
3047 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3048 	    vp))) {
3049 	}
3050 #endif
3051 	else if ((error = vn_writechk(vp)) == 0) {
3052 		VATTR_NULL(&vattr);
3053 		vattr.va_size = uap->length;
3054 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3055 	}
3056 	VOP_UNLOCK(vp, 0, td);
3057 	vn_finished_write(mp);
3058 drop:
3059 	VFS_UNLOCK_GIANT(vfslocked);
3060 	fdrop(fp, td);
3061 	return (error);
3062 }
3063 
3064 #if defined(COMPAT_43)
3065 /*
3066  * Truncate a file given its path name.
3067  */
3068 #ifndef _SYS_SYSPROTO_H_
3069 struct otruncate_args {
3070 	char	*path;
3071 	long	length;
3072 };
3073 #endif
3074 int
3075 otruncate(td, uap)
3076 	struct thread *td;
3077 	register struct otruncate_args /* {
3078 		char *path;
3079 		long length;
3080 	} */ *uap;
3081 {
3082 	struct truncate_args /* {
3083 		char *path;
3084 		int pad;
3085 		off_t length;
3086 	} */ nuap;
3087 
3088 	nuap.path = uap->path;
3089 	nuap.length = uap->length;
3090 	return (truncate(td, &nuap));
3091 }
3092 
3093 /*
3094  * Truncate a file given a file descriptor.
3095  */
3096 #ifndef _SYS_SYSPROTO_H_
3097 struct oftruncate_args {
3098 	int	fd;
3099 	long	length;
3100 };
3101 #endif
3102 int
3103 oftruncate(td, uap)
3104 	struct thread *td;
3105 	register struct oftruncate_args /* {
3106 		int fd;
3107 		long length;
3108 	} */ *uap;
3109 {
3110 	struct ftruncate_args /* {
3111 		int fd;
3112 		int pad;
3113 		off_t length;
3114 	} */ nuap;
3115 
3116 	nuap.fd = uap->fd;
3117 	nuap.length = uap->length;
3118 	return (ftruncate(td, &nuap));
3119 }
3120 #endif /* COMPAT_43 */
3121 
3122 /*
3123  * Sync an open file.
3124  */
3125 #ifndef _SYS_SYSPROTO_H_
3126 struct fsync_args {
3127 	int	fd;
3128 };
3129 #endif
3130 int
3131 fsync(td, uap)
3132 	struct thread *td;
3133 	struct fsync_args /* {
3134 		int fd;
3135 	} */ *uap;
3136 {
3137 	struct vnode *vp;
3138 	struct mount *mp;
3139 	struct file *fp;
3140 	int vfslocked;
3141 	int error;
3142 
3143 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3144 		return (error);
3145 	vp = fp->f_vnode;
3146 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3147 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3148 		goto drop;
3149 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3150 	if (vp->v_object != NULL) {
3151 		VM_OBJECT_LOCK(vp->v_object);
3152 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3153 		VM_OBJECT_UNLOCK(vp->v_object);
3154 	}
3155 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3156 
3157 	VOP_UNLOCK(vp, 0, td);
3158 	vn_finished_write(mp);
3159 drop:
3160 	VFS_UNLOCK_GIANT(vfslocked);
3161 	fdrop(fp, td);
3162 	return (error);
3163 }
3164 
3165 /*
3166  * Rename files.  Source and destination must either both be directories,
3167  * or both not be directories.  If target is a directory, it must be empty.
3168  */
3169 #ifndef _SYS_SYSPROTO_H_
3170 struct rename_args {
3171 	char	*from;
3172 	char	*to;
3173 };
3174 #endif
3175 int
3176 rename(td, uap)
3177 	struct thread *td;
3178 	register struct rename_args /* {
3179 		char *from;
3180 		char *to;
3181 	} */ *uap;
3182 {
3183 
3184 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3185 }
3186 
3187 int
3188 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3189 {
3190 	struct mount *mp = NULL;
3191 	struct vnode *tvp, *fvp, *tdvp;
3192 	struct nameidata fromnd, tond;
3193 	int tvfslocked;
3194 	int fvfslocked;
3195 	int error;
3196 
3197 	bwillwrite();
3198 #ifdef MAC
3199 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3200 	    pathseg, from, td);
3201 #else
3202 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3203 	    pathseg, from, td);
3204 #endif
3205 	if ((error = namei(&fromnd)) != 0)
3206 		return (error);
3207 	fvfslocked = NDHASGIANT(&fromnd);
3208 	tvfslocked = 0;
3209 #ifdef MAC
3210 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3211 	    fromnd.ni_vp, &fromnd.ni_cnd);
3212 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3213 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3214 #endif
3215 	fvp = fromnd.ni_vp;
3216 	if (error == 0)
3217 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3218 	if (error != 0) {
3219 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3220 		vrele(fromnd.ni_dvp);
3221 		vrele(fvp);
3222 		goto out1;
3223 	}
3224 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3225 	    MPSAFE, pathseg, to, td);
3226 	if (fromnd.ni_vp->v_type == VDIR)
3227 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3228 	if ((error = namei(&tond)) != 0) {
3229 		/* Translate error code for rename("dir1", "dir2/."). */
3230 		if (error == EISDIR && fvp->v_type == VDIR)
3231 			error = EINVAL;
3232 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3233 		vrele(fromnd.ni_dvp);
3234 		vrele(fvp);
3235 		goto out1;
3236 	}
3237 	tvfslocked = NDHASGIANT(&tond);
3238 	tdvp = tond.ni_dvp;
3239 	tvp = tond.ni_vp;
3240 	if (tvp != NULL) {
3241 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3242 			error = ENOTDIR;
3243 			goto out;
3244 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3245 			error = EISDIR;
3246 			goto out;
3247 		}
3248 	}
3249 	if (fvp == tdvp)
3250 		error = EINVAL;
3251 	/*
3252 	 * If the source is the same as the destination (that is, if they
3253 	 * are links to the same vnode), then there is nothing to do.
3254 	 */
3255 	if (fvp == tvp)
3256 		error = -1;
3257 #ifdef MAC
3258 	else
3259 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3260 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3261 #endif
3262 out:
3263 	if (!error) {
3264 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3265 		if (fromnd.ni_dvp != tdvp) {
3266 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3267 		}
3268 		if (tvp) {
3269 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3270 		}
3271 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3272 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3273 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3274 		NDFREE(&tond, NDF_ONLY_PNBUF);
3275 	} else {
3276 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3277 		NDFREE(&tond, NDF_ONLY_PNBUF);
3278 		if (tvp)
3279 			vput(tvp);
3280 		if (tdvp == tvp)
3281 			vrele(tdvp);
3282 		else
3283 			vput(tdvp);
3284 		vrele(fromnd.ni_dvp);
3285 		vrele(fvp);
3286 	}
3287 	vrele(tond.ni_startdir);
3288 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3289 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3290 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3291 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3292 out1:
3293 	vn_finished_write(mp);
3294 	if (fromnd.ni_startdir)
3295 		vrele(fromnd.ni_startdir);
3296 	VFS_UNLOCK_GIANT(fvfslocked);
3297 	VFS_UNLOCK_GIANT(tvfslocked);
3298 	if (error == -1)
3299 		return (0);
3300 	return (error);
3301 }
3302 
3303 /*
3304  * Make a directory file.
3305  */
3306 #ifndef _SYS_SYSPROTO_H_
3307 struct mkdir_args {
3308 	char	*path;
3309 	int	mode;
3310 };
3311 #endif
3312 int
3313 mkdir(td, uap)
3314 	struct thread *td;
3315 	register struct mkdir_args /* {
3316 		char *path;
3317 		int mode;
3318 	} */ *uap;
3319 {
3320 
3321 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3322 }
3323 
3324 int
3325 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3326 {
3327 	struct mount *mp;
3328 	struct vnode *vp;
3329 	struct vattr vattr;
3330 	int error;
3331 	struct nameidata nd;
3332 	int vfslocked;
3333 
3334 restart:
3335 	bwillwrite();
3336 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3337 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3338 	if ((error = namei(&nd)) != 0)
3339 		return (error);
3340 	vfslocked = NDHASGIANT(&nd);
3341 	vp = nd.ni_vp;
3342 	if (vp != NULL) {
3343 		NDFREE(&nd, NDF_ONLY_PNBUF);
3344 		vrele(vp);
3345 		/*
3346 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3347 		 * the strange behaviour of leaving the vnode unlocked
3348 		 * if the target is the same vnode as the parent.
3349 		 */
3350 		if (vp == nd.ni_dvp)
3351 			vrele(nd.ni_dvp);
3352 		else
3353 			vput(nd.ni_dvp);
3354 		VFS_UNLOCK_GIANT(vfslocked);
3355 		return (EEXIST);
3356 	}
3357 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3358 		NDFREE(&nd, NDF_ONLY_PNBUF);
3359 		vput(nd.ni_dvp);
3360 		VFS_UNLOCK_GIANT(vfslocked);
3361 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3362 			return (error);
3363 		goto restart;
3364 	}
3365 	VATTR_NULL(&vattr);
3366 	vattr.va_type = VDIR;
3367 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3368 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3369 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3370 #ifdef MAC
3371 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3372 	    &vattr);
3373 	if (error)
3374 		goto out;
3375 #endif
3376 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3377 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3378 #ifdef MAC
3379 out:
3380 #endif
3381 	NDFREE(&nd, NDF_ONLY_PNBUF);
3382 	vput(nd.ni_dvp);
3383 	if (!error)
3384 		vput(nd.ni_vp);
3385 	vn_finished_write(mp);
3386 	VFS_UNLOCK_GIANT(vfslocked);
3387 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3388 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3389 	return (error);
3390 }
3391 
3392 /*
3393  * Remove a directory file.
3394  */
3395 #ifndef _SYS_SYSPROTO_H_
3396 struct rmdir_args {
3397 	char	*path;
3398 };
3399 #endif
3400 int
3401 rmdir(td, uap)
3402 	struct thread *td;
3403 	struct rmdir_args /* {
3404 		char *path;
3405 	} */ *uap;
3406 {
3407 
3408 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3409 }
3410 
3411 int
3412 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3413 {
3414 	struct mount *mp;
3415 	struct vnode *vp;
3416 	int error;
3417 	struct nameidata nd;
3418 	int vfslocked;
3419 
3420 restart:
3421 	bwillwrite();
3422 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3423 	if ((error = namei(&nd)) != 0)
3424 		return (error);
3425 	vfslocked = NDHASGIANT(&nd);
3426 	vp = nd.ni_vp;
3427 	if (vp->v_type != VDIR) {
3428 		error = ENOTDIR;
3429 		goto out;
3430 	}
3431 	/*
3432 	 * No rmdir "." please.
3433 	 */
3434 	if (nd.ni_dvp == vp) {
3435 		error = EINVAL;
3436 		goto out;
3437 	}
3438 	/*
3439 	 * The root of a mounted filesystem cannot be deleted.
3440 	 */
3441 	if (vp->v_vflag & VV_ROOT) {
3442 		error = EBUSY;
3443 		goto out;
3444 	}
3445 #ifdef MAC
3446 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3447 	    &nd.ni_cnd);
3448 	if (error)
3449 		goto out;
3450 #endif
3451 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3452 		NDFREE(&nd, NDF_ONLY_PNBUF);
3453 		if (nd.ni_dvp == vp)
3454 			vrele(nd.ni_dvp);
3455 		else
3456 			vput(nd.ni_dvp);
3457 		vput(vp);
3458 		VFS_UNLOCK_GIANT(vfslocked);
3459 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3460 			return (error);
3461 		goto restart;
3462 	}
3463 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3464 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3465 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3466 	vn_finished_write(mp);
3467 out:
3468 	NDFREE(&nd, NDF_ONLY_PNBUF);
3469 	if (nd.ni_dvp == vp)
3470 		vrele(nd.ni_dvp);
3471 	else
3472 		vput(nd.ni_dvp);
3473 	vput(vp);
3474 	VFS_UNLOCK_GIANT(vfslocked);
3475 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3476 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3477 	return (error);
3478 }
3479 
3480 #ifdef COMPAT_43
3481 /*
3482  * Read a block of directory entries in a filesystem independent format.
3483  */
3484 #ifndef _SYS_SYSPROTO_H_
3485 struct ogetdirentries_args {
3486 	int	fd;
3487 	char	*buf;
3488 	u_int	count;
3489 	long	*basep;
3490 };
3491 #endif
3492 int
3493 ogetdirentries(td, uap)
3494 	struct thread *td;
3495 	register struct ogetdirentries_args /* {
3496 		int fd;
3497 		char *buf;
3498 		u_int count;
3499 		long *basep;
3500 	} */ *uap;
3501 {
3502 	struct vnode *vp;
3503 	struct file *fp;
3504 	struct uio auio, kuio;
3505 	struct iovec aiov, kiov;
3506 	struct dirent *dp, *edp;
3507 	caddr_t dirbuf;
3508 	int error, eofflag, readcnt;
3509 	long loff;
3510 
3511 	/* XXX arbitrary sanity limit on `count'. */
3512 	if (uap->count > 64 * 1024)
3513 		return (EINVAL);
3514 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3515 		return (error);
3516 	if ((fp->f_flag & FREAD) == 0) {
3517 		fdrop(fp, td);
3518 		return (EBADF);
3519 	}
3520 	vp = fp->f_vnode;
3521 unionread:
3522 	if (vp->v_type != VDIR) {
3523 		fdrop(fp, td);
3524 		return (EINVAL);
3525 	}
3526 	aiov.iov_base = uap->buf;
3527 	aiov.iov_len = uap->count;
3528 	auio.uio_iov = &aiov;
3529 	auio.uio_iovcnt = 1;
3530 	auio.uio_rw = UIO_READ;
3531 	auio.uio_segflg = UIO_USERSPACE;
3532 	auio.uio_td = td;
3533 	auio.uio_resid = uap->count;
3534 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3535 	loff = auio.uio_offset = fp->f_offset;
3536 #ifdef MAC
3537 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3538 	if (error) {
3539 		VOP_UNLOCK(vp, 0, td);
3540 		fdrop(fp, td);
3541 		return (error);
3542 	}
3543 #endif
3544 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3545 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3546 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3547 			    NULL, NULL);
3548 			fp->f_offset = auio.uio_offset;
3549 		} else
3550 #	endif
3551 	{
3552 		kuio = auio;
3553 		kuio.uio_iov = &kiov;
3554 		kuio.uio_segflg = UIO_SYSSPACE;
3555 		kiov.iov_len = uap->count;
3556 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3557 		kiov.iov_base = dirbuf;
3558 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3559 			    NULL, NULL);
3560 		fp->f_offset = kuio.uio_offset;
3561 		if (error == 0) {
3562 			readcnt = uap->count - kuio.uio_resid;
3563 			edp = (struct dirent *)&dirbuf[readcnt];
3564 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3565 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3566 					/*
3567 					 * The expected low byte of
3568 					 * dp->d_namlen is our dp->d_type.
3569 					 * The high MBZ byte of dp->d_namlen
3570 					 * is our dp->d_namlen.
3571 					 */
3572 					dp->d_type = dp->d_namlen;
3573 					dp->d_namlen = 0;
3574 #				else
3575 					/*
3576 					 * The dp->d_type is the high byte
3577 					 * of the expected dp->d_namlen,
3578 					 * so must be zero'ed.
3579 					 */
3580 					dp->d_type = 0;
3581 #				endif
3582 				if (dp->d_reclen > 0) {
3583 					dp = (struct dirent *)
3584 					    ((char *)dp + dp->d_reclen);
3585 				} else {
3586 					error = EIO;
3587 					break;
3588 				}
3589 			}
3590 			if (dp >= edp)
3591 				error = uiomove(dirbuf, readcnt, &auio);
3592 		}
3593 		FREE(dirbuf, M_TEMP);
3594 	}
3595 	VOP_UNLOCK(vp, 0, td);
3596 	if (error) {
3597 		fdrop(fp, td);
3598 		return (error);
3599 	}
3600 	if (uap->count == auio.uio_resid) {
3601 		if (union_dircheckp) {
3602 			error = union_dircheckp(td, &vp, fp);
3603 			if (error == -1)
3604 				goto unionread;
3605 			if (error) {
3606 				fdrop(fp, td);
3607 				return (error);
3608 			}
3609 		}
3610 		/*
3611 		 * XXX We could delay dropping the lock above but
3612 		 * union_dircheckp complicates things.
3613 		 */
3614 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3615 		if ((vp->v_vflag & VV_ROOT) &&
3616 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3617 			struct vnode *tvp = vp;
3618 			vp = vp->v_mount->mnt_vnodecovered;
3619 			VREF(vp);
3620 			fp->f_vnode = vp;
3621 			fp->f_data = vp;
3622 			fp->f_offset = 0;
3623 			vput(tvp);
3624 			goto unionread;
3625 		}
3626 		VOP_UNLOCK(vp, 0, td);
3627 	}
3628 	error = copyout(&loff, uap->basep, sizeof(long));
3629 	fdrop(fp, td);
3630 	td->td_retval[0] = uap->count - auio.uio_resid;
3631 	return (error);
3632 }
3633 #endif /* COMPAT_43 */
3634 
3635 /*
3636  * Read a block of directory entries in a filesystem independent format.
3637  */
3638 #ifndef _SYS_SYSPROTO_H_
3639 struct getdirentries_args {
3640 	int	fd;
3641 	char	*buf;
3642 	u_int	count;
3643 	long	*basep;
3644 };
3645 #endif
3646 int
3647 getdirentries(td, uap)
3648 	struct thread *td;
3649 	register struct getdirentries_args /* {
3650 		int fd;
3651 		char *buf;
3652 		u_int count;
3653 		long *basep;
3654 	} */ *uap;
3655 {
3656 	struct vnode *vp;
3657 	struct file *fp;
3658 	struct uio auio;
3659 	struct iovec aiov;
3660 	int vfslocked;
3661 	long loff;
3662 	int error, eofflag;
3663 
3664 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3665 		return (error);
3666 	if ((fp->f_flag & FREAD) == 0) {
3667 		fdrop(fp, td);
3668 		return (EBADF);
3669 	}
3670 	vp = fp->f_vnode;
3671 unionread:
3672 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3673 	if (vp->v_type != VDIR) {
3674 		error = EINVAL;
3675 		goto fail;
3676 	}
3677 	aiov.iov_base = uap->buf;
3678 	aiov.iov_len = uap->count;
3679 	auio.uio_iov = &aiov;
3680 	auio.uio_iovcnt = 1;
3681 	auio.uio_rw = UIO_READ;
3682 	auio.uio_segflg = UIO_USERSPACE;
3683 	auio.uio_td = td;
3684 	auio.uio_resid = uap->count;
3685 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3686 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3687 	loff = auio.uio_offset = fp->f_offset;
3688 #ifdef MAC
3689 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3690 	if (error == 0)
3691 #endif
3692 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3693 		    NULL);
3694 	fp->f_offset = auio.uio_offset;
3695 	VOP_UNLOCK(vp, 0, td);
3696 	if (error)
3697 		goto fail;
3698 	if (uap->count == auio.uio_resid) {
3699 		if (union_dircheckp) {
3700 			error = union_dircheckp(td, &vp, fp);
3701 			if (error == -1) {
3702 				VFS_UNLOCK_GIANT(vfslocked);
3703 				goto unionread;
3704 			}
3705 			if (error)
3706 				goto fail;
3707 		}
3708 		/*
3709 		 * XXX We could delay dropping the lock above but
3710 		 * union_dircheckp complicates things.
3711 		 */
3712 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3713 		if ((vp->v_vflag & VV_ROOT) &&
3714 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3715 			struct vnode *tvp = vp;
3716 			vp = vp->v_mount->mnt_vnodecovered;
3717 			VREF(vp);
3718 			fp->f_vnode = vp;
3719 			fp->f_data = vp;
3720 			fp->f_offset = 0;
3721 			vput(tvp);
3722 			VFS_UNLOCK_GIANT(vfslocked);
3723 			goto unionread;
3724 		}
3725 		VOP_UNLOCK(vp, 0, td);
3726 	}
3727 	if (uap->basep != NULL) {
3728 		error = copyout(&loff, uap->basep, sizeof(long));
3729 	}
3730 	td->td_retval[0] = uap->count - auio.uio_resid;
3731 fail:
3732 	VFS_UNLOCK_GIANT(vfslocked);
3733 	fdrop(fp, td);
3734 	return (error);
3735 }
3736 #ifndef _SYS_SYSPROTO_H_
3737 struct getdents_args {
3738 	int fd;
3739 	char *buf;
3740 	size_t count;
3741 };
3742 #endif
3743 int
3744 getdents(td, uap)
3745 	struct thread *td;
3746 	register struct getdents_args /* {
3747 		int fd;
3748 		char *buf;
3749 		u_int count;
3750 	} */ *uap;
3751 {
3752 	struct getdirentries_args ap;
3753 	ap.fd = uap->fd;
3754 	ap.buf = uap->buf;
3755 	ap.count = uap->count;
3756 	ap.basep = NULL;
3757 	return (getdirentries(td, &ap));
3758 }
3759 
3760 /*
3761  * Set the mode mask for creation of filesystem nodes.
3762  *
3763  * MP SAFE
3764  */
3765 #ifndef _SYS_SYSPROTO_H_
3766 struct umask_args {
3767 	int	newmask;
3768 };
3769 #endif
3770 int
3771 umask(td, uap)
3772 	struct thread *td;
3773 	struct umask_args /* {
3774 		int newmask;
3775 	} */ *uap;
3776 {
3777 	register struct filedesc *fdp;
3778 
3779 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3780 	fdp = td->td_proc->p_fd;
3781 	td->td_retval[0] = fdp->fd_cmask;
3782 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3783 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3784 	return (0);
3785 }
3786 
3787 /*
3788  * Void all references to file by ripping underlying filesystem
3789  * away from vnode.
3790  */
3791 #ifndef _SYS_SYSPROTO_H_
3792 struct revoke_args {
3793 	char	*path;
3794 };
3795 #endif
3796 int
3797 revoke(td, uap)
3798 	struct thread *td;
3799 	register struct revoke_args /* {
3800 		char *path;
3801 	} */ *uap;
3802 {
3803 	struct vnode *vp;
3804 	struct vattr vattr;
3805 	int error;
3806 	struct nameidata nd;
3807 	int vfslocked;
3808 
3809 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3810 	    uap->path, td);
3811 	if ((error = namei(&nd)) != 0)
3812 		return (error);
3813 	vfslocked = NDHASGIANT(&nd);
3814 	vp = nd.ni_vp;
3815 	NDFREE(&nd, NDF_ONLY_PNBUF);
3816 	if (vp->v_type != VCHR) {
3817 		error = EINVAL;
3818 		goto out;
3819 	}
3820 #ifdef MAC
3821 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3822 	if (error)
3823 		goto out;
3824 #endif
3825 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3826 	if (error)
3827 		goto out;
3828 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3829 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3830 		if (error)
3831 			goto out;
3832 	}
3833 	if (vcount(vp) > 1)
3834 		VOP_REVOKE(vp, REVOKEALL);
3835 out:
3836 	vput(vp);
3837 	VFS_UNLOCK_GIANT(vfslocked);
3838 	return (error);
3839 }
3840 
3841 /*
3842  * Convert a user file descriptor to a kernel file entry.
3843  * A reference on the file entry is held upon returning.
3844  */
3845 int
3846 getvnode(fdp, fd, fpp)
3847 	struct filedesc *fdp;
3848 	int fd;
3849 	struct file **fpp;
3850 {
3851 	int error;
3852 	struct file *fp;
3853 
3854 	fp = NULL;
3855 	if (fdp == NULL)
3856 		error = EBADF;
3857 	else {
3858 		FILEDESC_LOCK(fdp);
3859 		if ((u_int)fd >= fdp->fd_nfiles ||
3860 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3861 			error = EBADF;
3862 		else if (fp->f_vnode == NULL) {
3863 			fp = NULL;
3864 			error = EINVAL;
3865 		} else {
3866 			fhold(fp);
3867 			error = 0;
3868 		}
3869 		FILEDESC_UNLOCK(fdp);
3870 	}
3871 	*fpp = fp;
3872 	return (error);
3873 }
3874 
3875 /*
3876  * Get (NFS) file handle
3877  */
3878 #ifndef _SYS_SYSPROTO_H_
3879 struct lgetfh_args {
3880 	char	*fname;
3881 	fhandle_t *fhp;
3882 };
3883 #endif
3884 int
3885 lgetfh(td, uap)
3886 	struct thread *td;
3887 	register struct lgetfh_args *uap;
3888 {
3889 	struct nameidata nd;
3890 	fhandle_t fh;
3891 	register struct vnode *vp;
3892 	int vfslocked;
3893 	int error;
3894 
3895 	error = suser(td);
3896 	if (error)
3897 		return (error);
3898 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3899 	    UIO_USERSPACE, uap->fname, td);
3900 	error = namei(&nd);
3901 	if (error)
3902 		return (error);
3903 	vfslocked = NDHASGIANT(&nd);
3904 	NDFREE(&nd, NDF_ONLY_PNBUF);
3905 	vp = nd.ni_vp;
3906 	bzero(&fh, sizeof(fh));
3907 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3908 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3909 	vput(vp);
3910 	VFS_UNLOCK_GIANT(vfslocked);
3911 	if (error)
3912 		return (error);
3913 	error = copyout(&fh, uap->fhp, sizeof (fh));
3914 	return (error);
3915 }
3916 
3917 #ifndef _SYS_SYSPROTO_H_
3918 struct getfh_args {
3919 	char	*fname;
3920 	fhandle_t *fhp;
3921 };
3922 #endif
3923 int
3924 getfh(td, uap)
3925 	struct thread *td;
3926 	register struct getfh_args *uap;
3927 {
3928 	struct nameidata nd;
3929 	fhandle_t fh;
3930 	register struct vnode *vp;
3931 	int vfslocked;
3932 	int error;
3933 
3934 	error = suser(td);
3935 	if (error)
3936 		return (error);
3937 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3938 	    UIO_USERSPACE, uap->fname, td);
3939 	error = namei(&nd);
3940 	if (error)
3941 		return (error);
3942 	vfslocked = NDHASGIANT(&nd);
3943 	NDFREE(&nd, NDF_ONLY_PNBUF);
3944 	vp = nd.ni_vp;
3945 	bzero(&fh, sizeof(fh));
3946 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3947 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3948 	vput(vp);
3949 	VFS_UNLOCK_GIANT(vfslocked);
3950 	if (error)
3951 		return (error);
3952 	error = copyout(&fh, uap->fhp, sizeof (fh));
3953 	return (error);
3954 }
3955 
3956 /*
3957  * syscall for the rpc.lockd to use to translate a NFS file handle into
3958  * an open descriptor.
3959  *
3960  * warning: do not remove the suser() call or this becomes one giant
3961  * security hole.
3962  *
3963  * MP SAFE
3964  */
3965 #ifndef _SYS_SYSPROTO_H_
3966 struct fhopen_args {
3967 	const struct fhandle *u_fhp;
3968 	int flags;
3969 };
3970 #endif
3971 int
3972 fhopen(td, uap)
3973 	struct thread *td;
3974 	struct fhopen_args /* {
3975 		const struct fhandle *u_fhp;
3976 		int flags;
3977 	} */ *uap;
3978 {
3979 	struct proc *p = td->td_proc;
3980 	struct mount *mp;
3981 	struct vnode *vp;
3982 	struct fhandle fhp;
3983 	struct vattr vat;
3984 	struct vattr *vap = &vat;
3985 	struct flock lf;
3986 	struct file *fp;
3987 	register struct filedesc *fdp = p->p_fd;
3988 	int fmode, mode, error, type;
3989 	struct file *nfp;
3990 	int indx;
3991 
3992 	error = suser(td);
3993 	if (error)
3994 		return (error);
3995 	fmode = FFLAGS(uap->flags);
3996 	/* why not allow a non-read/write open for our lockd? */
3997 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3998 		return (EINVAL);
3999 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4000 	if (error)
4001 		return(error);
4002 	/* find the mount point */
4003 	mtx_lock(&Giant);
4004 	mp = vfs_getvfs(&fhp.fh_fsid);
4005 	if (mp == NULL) {
4006 		error = ESTALE;
4007 		goto out;
4008 	}
4009 	/* now give me my vnode, it gets returned to me locked */
4010 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4011 	if (error)
4012 		goto out;
4013 	/*
4014 	 * from now on we have to make sure not
4015 	 * to forget about the vnode
4016 	 * any error that causes an abort must vput(vp)
4017 	 * just set error = err and 'goto bad;'.
4018 	 */
4019 
4020 	/*
4021 	 * from vn_open
4022 	 */
4023 	if (vp->v_type == VLNK) {
4024 		error = EMLINK;
4025 		goto bad;
4026 	}
4027 	if (vp->v_type == VSOCK) {
4028 		error = EOPNOTSUPP;
4029 		goto bad;
4030 	}
4031 	mode = 0;
4032 	if (fmode & (FWRITE | O_TRUNC)) {
4033 		if (vp->v_type == VDIR) {
4034 			error = EISDIR;
4035 			goto bad;
4036 		}
4037 		error = vn_writechk(vp);
4038 		if (error)
4039 			goto bad;
4040 		mode |= VWRITE;
4041 	}
4042 	if (fmode & FREAD)
4043 		mode |= VREAD;
4044 	if (fmode & O_APPEND)
4045 		mode |= VAPPEND;
4046 #ifdef MAC
4047 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4048 	if (error)
4049 		goto bad;
4050 #endif
4051 	if (mode) {
4052 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4053 		if (error)
4054 			goto bad;
4055 	}
4056 	if (fmode & O_TRUNC) {
4057 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4058 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4059 			vrele(vp);
4060 			goto out;
4061 		}
4062 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4063 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4064 #ifdef MAC
4065 		/*
4066 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4067 		 * should be right.
4068 		 */
4069 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4070 		if (error == 0) {
4071 #endif
4072 			VATTR_NULL(vap);
4073 			vap->va_size = 0;
4074 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4075 #ifdef MAC
4076 		}
4077 #endif
4078 		vn_finished_write(mp);
4079 		if (error)
4080 			goto bad;
4081 	}
4082 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4083 	if (error)
4084 		goto bad;
4085 
4086 	if (fmode & FWRITE)
4087 		vp->v_writecount++;
4088 
4089 	/*
4090 	 * end of vn_open code
4091 	 */
4092 
4093 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4094 		if (fmode & FWRITE)
4095 			vp->v_writecount--;
4096 		goto bad;
4097 	}
4098 	/* An extra reference on `nfp' has been held for us by falloc(). */
4099 	fp = nfp;
4100 
4101 	nfp->f_vnode = vp;
4102 	nfp->f_data = vp;
4103 	nfp->f_flag = fmode & FMASK;
4104 	nfp->f_ops = &vnops;
4105 	nfp->f_type = DTYPE_VNODE;
4106 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4107 		lf.l_whence = SEEK_SET;
4108 		lf.l_start = 0;
4109 		lf.l_len = 0;
4110 		if (fmode & O_EXLOCK)
4111 			lf.l_type = F_WRLCK;
4112 		else
4113 			lf.l_type = F_RDLCK;
4114 		type = F_FLOCK;
4115 		if ((fmode & FNONBLOCK) == 0)
4116 			type |= F_WAIT;
4117 		VOP_UNLOCK(vp, 0, td);
4118 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4119 			    type)) != 0) {
4120 			/*
4121 			 * The lock request failed.  Normally close the
4122 			 * descriptor but handle the case where someone might
4123 			 * have dup()d or close()d it when we weren't looking.
4124 			 */
4125 			fdclose(fdp, fp, indx, td);
4126 
4127 			/*
4128 			 * release our private reference
4129 			 */
4130 			fdrop(fp, td);
4131 			goto out;
4132 		}
4133 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4134 		fp->f_flag |= FHASLOCK;
4135 	}
4136 
4137 	VOP_UNLOCK(vp, 0, td);
4138 	fdrop(fp, td);
4139 	mtx_unlock(&Giant);
4140 	td->td_retval[0] = indx;
4141 	return (0);
4142 
4143 bad:
4144 	vput(vp);
4145 out:
4146 	mtx_unlock(&Giant);
4147 	return (error);
4148 }
4149 
4150 /*
4151  * Stat an (NFS) file handle.
4152  *
4153  * MP SAFE
4154  */
4155 #ifndef _SYS_SYSPROTO_H_
4156 struct fhstat_args {
4157 	struct fhandle *u_fhp;
4158 	struct stat *sb;
4159 };
4160 #endif
4161 int
4162 fhstat(td, uap)
4163 	struct thread *td;
4164 	register struct fhstat_args /* {
4165 		struct fhandle *u_fhp;
4166 		struct stat *sb;
4167 	} */ *uap;
4168 {
4169 	struct stat sb;
4170 	fhandle_t fh;
4171 	struct mount *mp;
4172 	struct vnode *vp;
4173 	int error;
4174 
4175 	error = suser(td);
4176 	if (error)
4177 		return (error);
4178 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4179 	if (error)
4180 		return (error);
4181 	mtx_lock(&Giant);
4182 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4183 		mtx_unlock(&Giant);
4184 		return (ESTALE);
4185 	}
4186 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4187 		mtx_unlock(&Giant);
4188 		return (error);
4189 	}
4190 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4191 	vput(vp);
4192 	mtx_unlock(&Giant);
4193 	if (error)
4194 		return (error);
4195 	error = copyout(&sb, uap->sb, sizeof(sb));
4196 	return (error);
4197 }
4198 
4199 /*
4200  * Implement fstatfs() for (NFS) file handles.
4201  *
4202  * MP SAFE
4203  */
4204 #ifndef _SYS_SYSPROTO_H_
4205 struct fhstatfs_args {
4206 	struct fhandle *u_fhp;
4207 	struct statfs *buf;
4208 };
4209 #endif
4210 int
4211 fhstatfs(td, uap)
4212 	struct thread *td;
4213 	struct fhstatfs_args /* {
4214 		struct fhandle *u_fhp;
4215 		struct statfs *buf;
4216 	} */ *uap;
4217 {
4218 	struct statfs sf;
4219 	fhandle_t fh;
4220 	int error;
4221 
4222 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4223 	if (error)
4224 		return (error);
4225 	error = kern_fhstatfs(td, fh, &sf);
4226 	if (error)
4227 		return (error);
4228 	return (copyout(&sf, uap->buf, sizeof(sf)));
4229 }
4230 
4231 int
4232 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4233 {
4234 	struct statfs *sp;
4235 	struct mount *mp;
4236 	struct vnode *vp;
4237 	int error;
4238 
4239 	error = suser(td);
4240 	if (error)
4241 		return (error);
4242 	mtx_lock(&Giant);
4243 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4244 		mtx_unlock(&Giant);
4245 		return (ESTALE);
4246 	}
4247 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4248 	if (error) {
4249 		mtx_unlock(&Giant);
4250 		return (error);
4251 	}
4252 	mp = vp->v_mount;
4253 	sp = &mp->mnt_stat;
4254 	vput(vp);
4255 #ifdef MAC
4256 	error = mac_check_mount_stat(td->td_ucred, mp);
4257 	if (error) {
4258 		mtx_unlock(&Giant);
4259 		return (error);
4260 	}
4261 #endif
4262 	/*
4263 	 * Set these in case the underlying filesystem fails to do so.
4264 	 */
4265 	sp->f_version = STATFS_VERSION;
4266 	sp->f_namemax = NAME_MAX;
4267 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4268 	error = VFS_STATFS(mp, sp, td);
4269 	mtx_unlock(&Giant);
4270 	if (error)
4271 		return (error);
4272 	*buf = *sp;
4273 	return (0);
4274 }
4275 
4276 /*
4277  * Syscall to push extended attribute configuration information into the
4278  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4279  * a command (int cmd), and attribute name and misc data.  For now, the
4280  * attribute name is left in userspace for consumption by the VFS_op.
4281  * It will probably be changed to be copied into sysspace by the
4282  * syscall in the future, once issues with various consumers of the
4283  * attribute code have raised their hands.
4284  *
4285  * Currently this is used only by UFS Extended Attributes.
4286  */
4287 int
4288 extattrctl(td, uap)
4289 	struct thread *td;
4290 	struct extattrctl_args /* {
4291 		const char *path;
4292 		int cmd;
4293 		const char *filename;
4294 		int attrnamespace;
4295 		const char *attrname;
4296 	} */ *uap;
4297 {
4298 	struct vnode *filename_vp;
4299 	struct nameidata nd;
4300 	struct mount *mp, *mp_writable;
4301 	char attrname[EXTATTR_MAXNAMELEN];
4302 	int error;
4303 
4304 	/*
4305 	 * uap->attrname is not always defined.  We check again later when we
4306 	 * invoke the VFS call so as to pass in NULL there if needed.
4307 	 */
4308 	if (uap->attrname != NULL) {
4309 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4310 		    NULL);
4311 		if (error)
4312 			return (error);
4313 	}
4314 
4315 	/*
4316 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4317 	 * which VFS_EXTATTRCTL() will later release.
4318 	 */
4319 	filename_vp = NULL;
4320 	if (uap->filename != NULL) {
4321 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4322 		    uap->filename, td);
4323 		error = namei(&nd);
4324 		if (error)
4325 			return (error);
4326 		filename_vp = nd.ni_vp;
4327 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4328 	}
4329 
4330 	/* uap->path is always defined. */
4331 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4332 	error = namei(&nd);
4333 	if (error) {
4334 		if (filename_vp != NULL)
4335 			vput(filename_vp);
4336 		return (error);
4337 	}
4338 	mp = nd.ni_vp->v_mount;
4339 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4340 	NDFREE(&nd, 0);
4341 	if (error) {
4342 		if (filename_vp != NULL)
4343 			vput(filename_vp);
4344 		return (error);
4345 	}
4346 
4347 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4348 	    uap->attrname != NULL ? attrname : NULL, td);
4349 
4350 	vn_finished_write(mp_writable);
4351 	/*
4352 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4353 	 * filename_vp, so vrele it if it is defined.
4354 	 */
4355 	if (filename_vp != NULL)
4356 		vrele(filename_vp);
4357 	return (error);
4358 }
4359 
4360 /*-
4361  * Set a named extended attribute on a file or directory
4362  *
4363  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4364  *            kernelspace string pointer "attrname", userspace buffer
4365  *            pointer "data", buffer length "nbytes", thread "td".
4366  * Returns: 0 on success, an error number otherwise
4367  * Locks: none
4368  * References: vp must be a valid reference for the duration of the call
4369  */
4370 static int
4371 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4372     void *data, size_t nbytes, struct thread *td)
4373 {
4374 	struct mount *mp;
4375 	struct uio auio;
4376 	struct iovec aiov;
4377 	ssize_t cnt;
4378 	int error;
4379 
4380 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4381 	if (error)
4382 		return (error);
4383 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4384 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4385 
4386 	aiov.iov_base = data;
4387 	aiov.iov_len = nbytes;
4388 	auio.uio_iov = &aiov;
4389 	auio.uio_iovcnt = 1;
4390 	auio.uio_offset = 0;
4391 	if (nbytes > INT_MAX) {
4392 		error = EINVAL;
4393 		goto done;
4394 	}
4395 	auio.uio_resid = nbytes;
4396 	auio.uio_rw = UIO_WRITE;
4397 	auio.uio_segflg = UIO_USERSPACE;
4398 	auio.uio_td = td;
4399 	cnt = nbytes;
4400 
4401 #ifdef MAC
4402 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4403 	    attrname, &auio);
4404 	if (error)
4405 		goto done;
4406 #endif
4407 
4408 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4409 	    td->td_ucred, td);
4410 	cnt -= auio.uio_resid;
4411 	td->td_retval[0] = cnt;
4412 
4413 done:
4414 	VOP_UNLOCK(vp, 0, td);
4415 	vn_finished_write(mp);
4416 	return (error);
4417 }
4418 
4419 int
4420 extattr_set_fd(td, uap)
4421 	struct thread *td;
4422 	struct extattr_set_fd_args /* {
4423 		int fd;
4424 		int attrnamespace;
4425 		const char *attrname;
4426 		void *data;
4427 		size_t nbytes;
4428 	} */ *uap;
4429 {
4430 	struct file *fp;
4431 	char attrname[EXTATTR_MAXNAMELEN];
4432 	int error;
4433 
4434 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4435 	if (error)
4436 		return (error);
4437 
4438 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4439 	if (error)
4440 		return (error);
4441 
4442 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4443 	    attrname, uap->data, uap->nbytes, td);
4444 	fdrop(fp, td);
4445 
4446 	return (error);
4447 }
4448 
4449 int
4450 extattr_set_file(td, uap)
4451 	struct thread *td;
4452 	struct extattr_set_file_args /* {
4453 		const char *path;
4454 		int attrnamespace;
4455 		const char *attrname;
4456 		void *data;
4457 		size_t nbytes;
4458 	} */ *uap;
4459 {
4460 	struct nameidata nd;
4461 	char attrname[EXTATTR_MAXNAMELEN];
4462 	int error;
4463 
4464 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4465 	if (error)
4466 		return (error);
4467 
4468 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4469 	error = namei(&nd);
4470 	if (error)
4471 		return (error);
4472 	NDFREE(&nd, NDF_ONLY_PNBUF);
4473 
4474 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4475 	    uap->data, uap->nbytes, td);
4476 
4477 	vrele(nd.ni_vp);
4478 	return (error);
4479 }
4480 
4481 int
4482 extattr_set_link(td, uap)
4483 	struct thread *td;
4484 	struct extattr_set_link_args /* {
4485 		const char *path;
4486 		int attrnamespace;
4487 		const char *attrname;
4488 		void *data;
4489 		size_t nbytes;
4490 	} */ *uap;
4491 {
4492 	struct nameidata nd;
4493 	char attrname[EXTATTR_MAXNAMELEN];
4494 	int error;
4495 
4496 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4497 	if (error)
4498 		return (error);
4499 
4500 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4501 	error = namei(&nd);
4502 	if (error)
4503 		return (error);
4504 	NDFREE(&nd, NDF_ONLY_PNBUF);
4505 
4506 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4507 	    uap->data, uap->nbytes, td);
4508 
4509 	vrele(nd.ni_vp);
4510 	return (error);
4511 }
4512 
4513 /*-
4514  * Get a named extended attribute on a file or directory
4515  *
4516  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4517  *            kernelspace string pointer "attrname", userspace buffer
4518  *            pointer "data", buffer length "nbytes", thread "td".
4519  * Returns: 0 on success, an error number otherwise
4520  * Locks: none
4521  * References: vp must be a valid reference for the duration of the call
4522  */
4523 static int
4524 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4525     void *data, size_t nbytes, struct thread *td)
4526 {
4527 	struct uio auio, *auiop;
4528 	struct iovec aiov;
4529 	ssize_t cnt;
4530 	size_t size, *sizep;
4531 	int error;
4532 
4533 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4534 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4535 
4536 	/*
4537 	 * Slightly unusual semantics: if the user provides a NULL data
4538 	 * pointer, they don't want to receive the data, just the
4539 	 * maximum read length.
4540 	 */
4541 	auiop = NULL;
4542 	sizep = NULL;
4543 	cnt = 0;
4544 	if (data != NULL) {
4545 		aiov.iov_base = data;
4546 		aiov.iov_len = nbytes;
4547 		auio.uio_iov = &aiov;
4548 		auio.uio_offset = 0;
4549 		if (nbytes > INT_MAX) {
4550 			error = EINVAL;
4551 			goto done;
4552 		}
4553 		auio.uio_resid = nbytes;
4554 		auio.uio_rw = UIO_READ;
4555 		auio.uio_segflg = UIO_USERSPACE;
4556 		auio.uio_td = td;
4557 		auiop = &auio;
4558 		cnt = nbytes;
4559 	} else
4560 		sizep = &size;
4561 
4562 #ifdef MAC
4563 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4564 	    attrname, &auio);
4565 	if (error)
4566 		goto done;
4567 #endif
4568 
4569 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4570 	    td->td_ucred, td);
4571 
4572 	if (auiop != NULL) {
4573 		cnt -= auio.uio_resid;
4574 		td->td_retval[0] = cnt;
4575 	} else
4576 		td->td_retval[0] = size;
4577 
4578 done:
4579 	VOP_UNLOCK(vp, 0, td);
4580 	return (error);
4581 }
4582 
4583 int
4584 extattr_get_fd(td, uap)
4585 	struct thread *td;
4586 	struct extattr_get_fd_args /* {
4587 		int fd;
4588 		int attrnamespace;
4589 		const char *attrname;
4590 		void *data;
4591 		size_t nbytes;
4592 	} */ *uap;
4593 {
4594 	struct file *fp;
4595 	char attrname[EXTATTR_MAXNAMELEN];
4596 	int error;
4597 
4598 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4599 	if (error)
4600 		return (error);
4601 
4602 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4603 	if (error)
4604 		return (error);
4605 
4606 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4607 	    attrname, uap->data, uap->nbytes, td);
4608 
4609 	fdrop(fp, td);
4610 	return (error);
4611 }
4612 
4613 int
4614 extattr_get_file(td, uap)
4615 	struct thread *td;
4616 	struct extattr_get_file_args /* {
4617 		const char *path;
4618 		int attrnamespace;
4619 		const char *attrname;
4620 		void *data;
4621 		size_t nbytes;
4622 	} */ *uap;
4623 {
4624 	struct nameidata nd;
4625 	char attrname[EXTATTR_MAXNAMELEN];
4626 	int error;
4627 
4628 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4629 	if (error)
4630 		return (error);
4631 
4632 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4633 	error = namei(&nd);
4634 	if (error)
4635 		return (error);
4636 	NDFREE(&nd, NDF_ONLY_PNBUF);
4637 
4638 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4639 	    uap->data, uap->nbytes, td);
4640 
4641 	vrele(nd.ni_vp);
4642 	return (error);
4643 }
4644 
4645 int
4646 extattr_get_link(td, uap)
4647 	struct thread *td;
4648 	struct extattr_get_link_args /* {
4649 		const char *path;
4650 		int attrnamespace;
4651 		const char *attrname;
4652 		void *data;
4653 		size_t nbytes;
4654 	} */ *uap;
4655 {
4656 	struct nameidata nd;
4657 	char attrname[EXTATTR_MAXNAMELEN];
4658 	int error;
4659 
4660 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4661 	if (error)
4662 		return (error);
4663 
4664 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4665 	error = namei(&nd);
4666 	if (error)
4667 		return (error);
4668 	NDFREE(&nd, NDF_ONLY_PNBUF);
4669 
4670 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4671 	    uap->data, uap->nbytes, td);
4672 
4673 	vrele(nd.ni_vp);
4674 	return (error);
4675 }
4676 
4677 /*
4678  * extattr_delete_vp(): Delete a named extended attribute on a file or
4679  *                      directory
4680  *
4681  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4682  *            kernelspace string pointer "attrname", proc "p"
4683  * Returns: 0 on success, an error number otherwise
4684  * Locks: none
4685  * References: vp must be a valid reference for the duration of the call
4686  */
4687 static int
4688 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4689     struct thread *td)
4690 {
4691 	struct mount *mp;
4692 	int error;
4693 
4694 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4695 	if (error)
4696 		return (error);
4697 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4698 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4699 
4700 #ifdef MAC
4701 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4702 	    attrname);
4703 	if (error)
4704 		goto done;
4705 #endif
4706 
4707 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4708 	    td);
4709 	if (error == EOPNOTSUPP)
4710 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4711 		    td->td_ucred, td);
4712 #ifdef MAC
4713 done:
4714 #endif
4715 	VOP_UNLOCK(vp, 0, td);
4716 	vn_finished_write(mp);
4717 	return (error);
4718 }
4719 
4720 int
4721 extattr_delete_fd(td, uap)
4722 	struct thread *td;
4723 	struct extattr_delete_fd_args /* {
4724 		int fd;
4725 		int attrnamespace;
4726 		const char *attrname;
4727 	} */ *uap;
4728 {
4729 	struct file *fp;
4730 	struct vnode *vp;
4731 	char attrname[EXTATTR_MAXNAMELEN];
4732 	int error;
4733 
4734 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4735 	if (error)
4736 		return (error);
4737 
4738 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4739 	if (error)
4740 		return (error);
4741 	vp = fp->f_vnode;
4742 
4743 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4744 	fdrop(fp, td);
4745 	return (error);
4746 }
4747 
4748 int
4749 extattr_delete_file(td, uap)
4750 	struct thread *td;
4751 	struct extattr_delete_file_args /* {
4752 		const char *path;
4753 		int attrnamespace;
4754 		const char *attrname;
4755 	} */ *uap;
4756 {
4757 	struct nameidata nd;
4758 	char attrname[EXTATTR_MAXNAMELEN];
4759 	int error;
4760 
4761 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4762 	if (error)
4763 		return(error);
4764 
4765 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4766 	error = namei(&nd);
4767 	if (error)
4768 		return(error);
4769 	NDFREE(&nd, NDF_ONLY_PNBUF);
4770 
4771 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4772 	vrele(nd.ni_vp);
4773 	return(error);
4774 }
4775 
4776 int
4777 extattr_delete_link(td, uap)
4778 	struct thread *td;
4779 	struct extattr_delete_link_args /* {
4780 		const char *path;
4781 		int attrnamespace;
4782 		const char *attrname;
4783 	} */ *uap;
4784 {
4785 	struct nameidata nd;
4786 	char attrname[EXTATTR_MAXNAMELEN];
4787 	int error;
4788 
4789 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4790 	if (error)
4791 		return(error);
4792 
4793 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4794 	error = namei(&nd);
4795 	if (error)
4796 		return(error);
4797 	NDFREE(&nd, NDF_ONLY_PNBUF);
4798 
4799 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4800 	vrele(nd.ni_vp);
4801 	return(error);
4802 }
4803 
4804 /*-
4805  * Retrieve a list of extended attributes on a file or directory.
4806  *
4807  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4808  *            userspace buffer pointer "data", buffer length "nbytes",
4809  *            thread "td".
4810  * Returns: 0 on success, an error number otherwise
4811  * Locks: none
4812  * References: vp must be a valid reference for the duration of the call
4813  */
4814 static int
4815 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4816     size_t nbytes, struct thread *td)
4817 {
4818 	struct uio auio, *auiop;
4819 	size_t size, *sizep;
4820 	struct iovec aiov;
4821 	ssize_t cnt;
4822 	int error;
4823 
4824 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4825 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4826 
4827 	auiop = NULL;
4828 	sizep = NULL;
4829 	cnt = 0;
4830 	if (data != NULL) {
4831 		aiov.iov_base = data;
4832 		aiov.iov_len = nbytes;
4833 		auio.uio_iov = &aiov;
4834 		auio.uio_offset = 0;
4835 		if (nbytes > INT_MAX) {
4836 			error = EINVAL;
4837 			goto done;
4838 		}
4839 		auio.uio_resid = nbytes;
4840 		auio.uio_rw = UIO_READ;
4841 		auio.uio_segflg = UIO_USERSPACE;
4842 		auio.uio_td = td;
4843 		auiop = &auio;
4844 		cnt = nbytes;
4845 	} else
4846 		sizep = &size;
4847 
4848 #ifdef MAC
4849 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4850 	if (error)
4851 		goto done;
4852 #endif
4853 
4854 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4855 	    td->td_ucred, td);
4856 
4857 	if (auiop != NULL) {
4858 		cnt -= auio.uio_resid;
4859 		td->td_retval[0] = cnt;
4860 	} else
4861 		td->td_retval[0] = size;
4862 
4863 done:
4864 	VOP_UNLOCK(vp, 0, td);
4865 	return (error);
4866 }
4867 
4868 
4869 int
4870 extattr_list_fd(td, uap)
4871 	struct thread *td;
4872 	struct extattr_list_fd_args /* {
4873 		int fd;
4874 		int attrnamespace;
4875 		void *data;
4876 		size_t nbytes;
4877 	} */ *uap;
4878 {
4879 	struct file *fp;
4880 	int error;
4881 
4882 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4883 	if (error)
4884 		return (error);
4885 
4886 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4887 	    uap->nbytes, td);
4888 
4889 	fdrop(fp, td);
4890 	return (error);
4891 }
4892 
4893 int
4894 extattr_list_file(td, uap)
4895 	struct thread*td;
4896 	struct extattr_list_file_args /* {
4897 		const char *path;
4898 		int attrnamespace;
4899 		void *data;
4900 		size_t nbytes;
4901 	} */ *uap;
4902 {
4903 	struct nameidata nd;
4904 	int error;
4905 
4906 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4907 	error = namei(&nd);
4908 	if (error)
4909 		return (error);
4910 	NDFREE(&nd, NDF_ONLY_PNBUF);
4911 
4912 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4913 	    uap->nbytes, td);
4914 
4915 	vrele(nd.ni_vp);
4916 	return (error);
4917 }
4918 
4919 int
4920 extattr_list_link(td, uap)
4921 	struct thread*td;
4922 	struct extattr_list_link_args /* {
4923 		const char *path;
4924 		int attrnamespace;
4925 		void *data;
4926 		size_t nbytes;
4927 	} */ *uap;
4928 {
4929 	struct nameidata nd;
4930 	int error;
4931 
4932 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4933 	error = namei(&nd);
4934 	if (error)
4935 		return (error);
4936 	NDFREE(&nd, NDF_ONLY_PNBUF);
4937 
4938 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4939 	    uap->nbytes, td);
4940 
4941 	vrele(nd.ni_vp);
4942 	return (error);
4943 }
4944