xref: /freebsd/sys/kern/vfs_extattr.c (revision 2bc6540439d0932b38067c9cc321fa0e2a61f264)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/uma.h>
77 
78 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81 static int setfmode(struct thread *td, struct vnode *, int);
82 static int setfflags(struct thread *td, struct vnode *, int);
83 static int setutimes(struct thread *td, struct vnode *,
84     const struct timespec *, int, int);
85 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86     struct thread *td);
87 
88 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89     size_t nbytes, struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 
93 /*
94  * The module initialization routine for POSIX asynchronous I/O will
95  * set this to the version of AIO that it implements.  (Zero means
96  * that it is not implemented.)  This value is used here by pathconf()
97  * and in kern_descrip.c by fpathconf().
98  */
99 int async_io_version;
100 
101 /*
102  * Sync each mounted filesystem.
103  */
104 #ifndef _SYS_SYSPROTO_H_
105 struct sync_args {
106 	int     dummy;
107 };
108 #endif
109 
110 #ifdef DEBUG
111 static int syncprt = 0;
112 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
113 #endif
114 
115 /* ARGSUSED */
116 int
117 sync(td, uap)
118 	struct thread *td;
119 	struct sync_args *uap;
120 {
121 	struct mount *mp, *nmp;
122 	int asyncflag;
123 
124 	mtx_lock(&Giant);
125 	mtx_lock(&mountlist_mtx);
126 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128 			nmp = TAILQ_NEXT(mp, mnt_list);
129 			continue;
130 		}
131 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133 			asyncflag = mp->mnt_flag & MNT_ASYNC;
134 			mp->mnt_flag &= ~MNT_ASYNC;
135 			vfs_msync(mp, MNT_NOWAIT);
136 			VFS_SYNC(mp, MNT_NOWAIT, td);
137 			mp->mnt_flag |= asyncflag;
138 			vn_finished_write(mp);
139 		}
140 		mtx_lock(&mountlist_mtx);
141 		nmp = TAILQ_NEXT(mp, mnt_list);
142 		vfs_unbusy(mp, td);
143 	}
144 	mtx_unlock(&mountlist_mtx);
145 #if 0
146 /*
147  * XXX don't call vfs_bufstats() yet because that routine
148  * was not imported in the Lite2 merge.
149  */
150 #ifdef DIAGNOSTIC
151 	if (syncprt)
152 		vfs_bufstats();
153 #endif /* DIAGNOSTIC */
154 #endif
155 	mtx_unlock(&Giant);
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  *
168  * MP SAFE
169  */
170 #ifndef _SYS_SYSPROTO_H_
171 struct quotactl_args {
172 	char *path;
173 	int cmd;
174 	int uid;
175 	caddr_t arg;
176 };
177 #endif
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		char *path;
183 		int cmd;
184 		int uid;
185 		caddr_t arg;
186 	} */ *uap;
187 {
188 	struct mount *mp, *vmp;
189 	int error;
190 	struct nameidata nd;
191 
192 	if (jailed(td->td_ucred) && !prison_quotas)
193 		return (EPERM);
194 	mtx_lock(&Giant);
195 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
196 	if ((error = namei(&nd)) != 0) {
197 		mtx_unlock(&Giant);
198 		return (error);
199 	}
200 	NDFREE(&nd, NDF_ONLY_PNBUF);
201 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
202 	mp = nd.ni_vp->v_mount;
203 	vrele(nd.ni_vp);
204 	if (error) {
205 		mtx_unlock(&Giant);
206 		return (error);
207 	}
208 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
209 	vn_finished_write(vmp);
210 	mtx_unlock(&Giant);
211 	return (error);
212 }
213 
214 /*
215  * Get filesystem statistics.
216  */
217 #ifndef _SYS_SYSPROTO_H_
218 struct statfs_args {
219 	char *path;
220 	struct statfs *buf;
221 };
222 #endif
223 int
224 statfs(td, uap)
225 	struct thread *td;
226 	register struct statfs_args /* {
227 		char *path;
228 		struct statfs *buf;
229 	} */ *uap;
230 {
231 	struct statfs sf;
232 	int error;
233 
234 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235 	if (error == 0)
236 		error = copyout(&sf, uap->buf, sizeof(sf));
237 	return (error);
238 }
239 
240 int
241 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242     struct statfs *buf)
243 {
244 	struct mount *mp;
245 	struct statfs *sp, sb;
246 	int error;
247 	struct nameidata nd;
248 
249 	mtx_lock(&Giant);
250 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
251 	error = namei(&nd);
252 	if (error) {
253 		mtx_unlock(&Giant);
254 		return (error);
255 	}
256 	mp = nd.ni_vp->v_mount;
257 	sp = &mp->mnt_stat;
258 	NDFREE(&nd, NDF_ONLY_PNBUF);
259 	vrele(nd.ni_vp);
260 #ifdef MAC
261 	error = mac_check_mount_stat(td->td_ucred, mp);
262 	if (error) {
263 		mtx_unlock(&Giant);
264 		return (error);
265 	}
266 #endif
267 	/*
268 	 * Set these in case the underlying filesystem fails to do so.
269 	 */
270 	sp->f_version = STATFS_VERSION;
271 	sp->f_namemax = NAME_MAX;
272 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
273 	error = VFS_STATFS(mp, sp, td);
274 	if (error) {
275 		mtx_unlock(&Giant);
276 		return (error);
277 	}
278 	if (suser(td)) {
279 		bcopy(sp, &sb, sizeof(sb));
280 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
281 		prison_enforce_statfs(td->td_ucred, mp, &sb);
282 		sp = &sb;
283 	}
284 	mtx_unlock(&Giant);
285 	*buf = *sp;
286 	return (0);
287 }
288 
289 /*
290  * Get filesystem statistics.
291  */
292 #ifndef _SYS_SYSPROTO_H_
293 struct fstatfs_args {
294 	int fd;
295 	struct statfs *buf;
296 };
297 #endif
298 int
299 fstatfs(td, uap)
300 	struct thread *td;
301 	register struct fstatfs_args /* {
302 		int fd;
303 		struct statfs *buf;
304 	} */ *uap;
305 {
306 	struct statfs sf;
307 	int error;
308 
309 	error = kern_fstatfs(td, uap->fd, &sf);
310 	if (error == 0)
311 		error = copyout(&sf, uap->buf, sizeof(sf));
312 	return (error);
313 }
314 
315 int
316 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
317 {
318 	struct file *fp;
319 	struct mount *mp;
320 	struct statfs *sp, sb;
321 	int error;
322 
323 	error = getvnode(td->td_proc->p_fd, fd, &fp);
324 	if (error)
325 		return (error);
326 	mp = fp->f_vnode->v_mount;
327 	fdrop(fp, td);
328 	if (mp == NULL)
329 		return (EBADF);
330 	mtx_lock(&Giant);
331 #ifdef MAC
332 	error = mac_check_mount_stat(td->td_ucred, mp);
333 	if (error) {
334 		mtx_unlock(&Giant);
335 		return (error);
336 	}
337 #endif
338 	sp = &mp->mnt_stat;
339 	/*
340 	 * Set these in case the underlying filesystem fails to do so.
341 	 */
342 	sp->f_version = STATFS_VERSION;
343 	sp->f_namemax = NAME_MAX;
344 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
345 	error = VFS_STATFS(mp, sp, td);
346 	if (error) {
347 		mtx_unlock(&Giant);
348 		return (error);
349 	}
350 	if (suser(td)) {
351 		bcopy(sp, &sb, sizeof(sb));
352 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
353 		prison_enforce_statfs(td->td_ucred, mp, &sb);
354 		sp = &sb;
355 	}
356 	mtx_unlock(&Giant);
357 	*buf = *sp;
358 	return (0);
359 }
360 
361 /*
362  * Get statistics on all filesystems.
363  */
364 #ifndef _SYS_SYSPROTO_H_
365 struct getfsstat_args {
366 	struct statfs *buf;
367 	long bufsize;
368 	int flags;
369 };
370 #endif
371 int
372 getfsstat(td, uap)
373 	struct thread *td;
374 	register struct getfsstat_args /* {
375 		struct statfs *buf;
376 		long bufsize;
377 		int flags;
378 	} */ *uap;
379 {
380 
381 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
382 	    uap->flags));
383 }
384 
385 /*
386  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
387  * 	The caller is responsible for freeing memory which will be allocated
388  *	in '*buf'.
389  */
390 int
391 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
392     enum uio_seg bufseg, int flags)
393 {
394 	struct mount *mp, *nmp;
395 	struct statfs *sfsp, *sp, sb;
396 	size_t count, maxcount;
397 	int error;
398 
399 	maxcount = bufsize / sizeof(struct statfs);
400 	if (bufsize == 0)
401 		sfsp = NULL;
402 	else if (bufseg == UIO_USERSPACE)
403 		sfsp = *buf;
404 	else /* if (bufseg == UIO_SYSSPACE) */ {
405 		count = 0;
406 		mtx_lock(&mountlist_mtx);
407 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
408 			count++;
409 		}
410 		mtx_unlock(&mountlist_mtx);
411 		if (maxcount > count)
412 			maxcount = count;
413 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
414 		    M_WAITOK);
415 	}
416 	count = 0;
417 	mtx_lock(&Giant);
418 	mtx_lock(&mountlist_mtx);
419 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
420 		if (prison_canseemount(td->td_ucred, mp) != 0) {
421 			nmp = TAILQ_NEXT(mp, mnt_list);
422 			continue;
423 		}
424 #ifdef MAC
425 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
426 			nmp = TAILQ_NEXT(mp, mnt_list);
427 			continue;
428 		}
429 #endif
430 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
431 			nmp = TAILQ_NEXT(mp, mnt_list);
432 			continue;
433 		}
434 		if (sfsp && count < maxcount) {
435 			sp = &mp->mnt_stat;
436 			/*
437 			 * Set these in case the underlying filesystem
438 			 * fails to do so.
439 			 */
440 			sp->f_version = STATFS_VERSION;
441 			sp->f_namemax = NAME_MAX;
442 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
443 			/*
444 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
445 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
446 			 * overrides MNT_WAIT.
447 			 */
448 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
449 			    (flags & MNT_WAIT)) &&
450 			    (error = VFS_STATFS(mp, sp, td))) {
451 				mtx_lock(&mountlist_mtx);
452 				nmp = TAILQ_NEXT(mp, mnt_list);
453 				vfs_unbusy(mp, td);
454 				continue;
455 			}
456 			if (suser(td)) {
457 				bcopy(sp, &sb, sizeof(sb));
458 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
459 				prison_enforce_statfs(td->td_ucred, mp, &sb);
460 				sp = &sb;
461 			}
462 			if (bufseg == UIO_SYSSPACE)
463 				bcopy(sp, sfsp, sizeof(*sp));
464 			else /* if (bufseg == UIO_USERSPACE) */ {
465 				error = copyout(sp, sfsp, sizeof(*sp));
466 				if (error) {
467 					vfs_unbusy(mp, td);
468 					mtx_unlock(&Giant);
469 					return (error);
470 				}
471 			}
472 			sfsp++;
473 		}
474 		count++;
475 		mtx_lock(&mountlist_mtx);
476 		nmp = TAILQ_NEXT(mp, mnt_list);
477 		vfs_unbusy(mp, td);
478 	}
479 	mtx_unlock(&mountlist_mtx);
480 	mtx_unlock(&Giant);
481 	if (sfsp && count > maxcount)
482 		td->td_retval[0] = maxcount;
483 	else
484 		td->td_retval[0] = count;
485 	return (0);
486 }
487 
488 #ifdef COMPAT_FREEBSD4
489 /*
490  * Get old format filesystem statistics.
491  */
492 static void cvtstatfs(struct statfs *, struct ostatfs *);
493 
494 #ifndef _SYS_SYSPROTO_H_
495 struct freebsd4_statfs_args {
496 	char *path;
497 	struct ostatfs *buf;
498 };
499 #endif
500 int
501 freebsd4_statfs(td, uap)
502 	struct thread *td;
503 	struct freebsd4_statfs_args /* {
504 		char *path;
505 		struct ostatfs *buf;
506 	} */ *uap;
507 {
508 	struct ostatfs osb;
509 	struct statfs sf;
510 	int error;
511 
512 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
513 	if (error)
514 		return (error);
515 	cvtstatfs(&sf, &osb);
516 	return (copyout(&osb, uap->buf, sizeof(osb)));
517 }
518 
519 /*
520  * Get filesystem statistics.
521  */
522 #ifndef _SYS_SYSPROTO_H_
523 struct freebsd4_fstatfs_args {
524 	int fd;
525 	struct ostatfs *buf;
526 };
527 #endif
528 int
529 freebsd4_fstatfs(td, uap)
530 	struct thread *td;
531 	struct freebsd4_fstatfs_args /* {
532 		int fd;
533 		struct ostatfs *buf;
534 	} */ *uap;
535 {
536 	struct ostatfs osb;
537 	struct statfs sf;
538 	int error;
539 
540 	error = kern_fstatfs(td, uap->fd, &sf);
541 	if (error)
542 		return (error);
543 	cvtstatfs(&sf, &osb);
544 	return (copyout(&osb, uap->buf, sizeof(osb)));
545 }
546 
547 /*
548  * Get statistics on all filesystems.
549  */
550 #ifndef _SYS_SYSPROTO_H_
551 struct freebsd4_getfsstat_args {
552 	struct ostatfs *buf;
553 	long bufsize;
554 	int flags;
555 };
556 #endif
557 int
558 freebsd4_getfsstat(td, uap)
559 	struct thread *td;
560 	register struct freebsd4_getfsstat_args /* {
561 		struct ostatfs *buf;
562 		long bufsize;
563 		int flags;
564 	} */ *uap;
565 {
566 	struct statfs *buf, *sp;
567 	struct ostatfs osb;
568 	size_t count, size;
569 	int error;
570 
571 	count = uap->bufsize / sizeof(struct ostatfs);
572 	size = count * sizeof(struct statfs);
573 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
574 	if (size > 0) {
575 		count = td->td_retval[0];
576 		sp = buf;
577 		while (count > 0 && error == 0) {
578 			cvtstatfs(sp, &osb);
579 			error = copyout(&osb, uap->buf, sizeof(osb));
580 			sp++;
581 			uap->buf++;
582 			count--;
583 		}
584 		free(buf, M_TEMP);
585 	}
586 	return (error);
587 }
588 
589 /*
590  * Implement fstatfs() for (NFS) file handles.
591  */
592 #ifndef _SYS_SYSPROTO_H_
593 struct freebsd4_fhstatfs_args {
594 	struct fhandle *u_fhp;
595 	struct ostatfs *buf;
596 };
597 #endif
598 int
599 freebsd4_fhstatfs(td, uap)
600 	struct thread *td;
601 	struct freebsd4_fhstatfs_args /* {
602 		struct fhandle *u_fhp;
603 		struct ostatfs *buf;
604 	} */ *uap;
605 {
606 	struct ostatfs osb;
607 	struct statfs sf;
608 	fhandle_t fh;
609 	int error;
610 
611 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
612 	if (error)
613 		return (error);
614 	error = kern_fhstatfs(td, fh, &sf);
615 	if (error)
616 		return (error);
617 	cvtstatfs(&sf, &osb);
618 	return (copyout(&osb, uap->buf, sizeof(osb)));
619 }
620 
621 /*
622  * Convert a new format statfs structure to an old format statfs structure.
623  */
624 static void
625 cvtstatfs(nsp, osp)
626 	struct statfs *nsp;
627 	struct ostatfs *osp;
628 {
629 
630 	bzero(osp, sizeof(*osp));
631 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
632 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
633 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
634 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
635 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
636 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
637 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
638 	osp->f_owner = nsp->f_owner;
639 	osp->f_type = nsp->f_type;
640 	osp->f_flags = nsp->f_flags;
641 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
642 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
643 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
644 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
645 	bcopy(nsp->f_fstypename, osp->f_fstypename,
646 	    MIN(MFSNAMELEN, OMNAMELEN));
647 	bcopy(nsp->f_mntonname, osp->f_mntonname,
648 	    MIN(MFSNAMELEN, OMNAMELEN));
649 	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
650 	    MIN(MFSNAMELEN, OMNAMELEN));
651 	osp->f_fsid = nsp->f_fsid;
652 }
653 #endif /* COMPAT_FREEBSD4 */
654 
655 /*
656  * Change current working directory to a given file descriptor.
657  */
658 #ifndef _SYS_SYSPROTO_H_
659 struct fchdir_args {
660 	int	fd;
661 };
662 #endif
663 int
664 fchdir(td, uap)
665 	struct thread *td;
666 	struct fchdir_args /* {
667 		int fd;
668 	} */ *uap;
669 {
670 	register struct filedesc *fdp = td->td_proc->p_fd;
671 	struct vnode *vp, *tdp, *vpold;
672 	struct mount *mp;
673 	struct file *fp;
674 	int vfslocked;
675 	int error;
676 
677 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
678 		return (error);
679 	vp = fp->f_vnode;
680 	VREF(vp);
681 	fdrop(fp, td);
682 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
683 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
684 	if (vp->v_type != VDIR)
685 		error = ENOTDIR;
686 #ifdef MAC
687 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
688 	}
689 #endif
690 	else
691 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
692 	while (!error && (mp = vp->v_mountedhere) != NULL) {
693 		int tvfslocked;
694 		if (vfs_busy(mp, 0, 0, td))
695 			continue;
696 		tvfslocked = VFS_LOCK_GIANT(mp);
697 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
698 		vfs_unbusy(mp, td);
699 		if (error) {
700 			VFS_UNLOCK_GIANT(tvfslocked);
701 			break;
702 		}
703 		vput(vp);
704 		VFS_UNLOCK_GIANT(vfslocked);
705 		vp = tdp;
706 		vfslocked = tvfslocked;
707 	}
708 	if (error) {
709 		vput(vp);
710 		VFS_UNLOCK_GIANT(vfslocked);
711 		return (error);
712 	}
713 	VOP_UNLOCK(vp, 0, td);
714 	FILEDESC_LOCK_FAST(fdp);
715 	vpold = fdp->fd_cdir;
716 	fdp->fd_cdir = vp;
717 	FILEDESC_UNLOCK_FAST(fdp);
718 	vrele(vpold);
719 	VFS_UNLOCK_GIANT(vfslocked);
720 	return (0);
721 }
722 
723 /*
724  * Change current working directory (``.'').
725  */
726 #ifndef _SYS_SYSPROTO_H_
727 struct chdir_args {
728 	char	*path;
729 };
730 #endif
731 int
732 chdir(td, uap)
733 	struct thread *td;
734 	struct chdir_args /* {
735 		char *path;
736 	} */ *uap;
737 {
738 
739 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
740 }
741 
742 int
743 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
744 {
745 	register struct filedesc *fdp = td->td_proc->p_fd;
746 	int error;
747 	struct nameidata nd;
748 	struct vnode *vp;
749 	int vfslocked;
750 
751 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
752 	if ((error = namei(&nd)) != 0)
753 		return (error);
754 	vfslocked = NDHASGIANT(&nd);
755 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
756 		vput(nd.ni_vp);
757 		VFS_UNLOCK_GIANT(vfslocked);
758 		NDFREE(&nd, NDF_ONLY_PNBUF);
759 		return (error);
760 	}
761 	VOP_UNLOCK(nd.ni_vp, 0, td);
762 	NDFREE(&nd, NDF_ONLY_PNBUF);
763 	FILEDESC_LOCK_FAST(fdp);
764 	vp = fdp->fd_cdir;
765 	fdp->fd_cdir = nd.ni_vp;
766 	FILEDESC_UNLOCK_FAST(fdp);
767 	vrele(vp);
768 	VFS_UNLOCK_GIANT(vfslocked);
769 	return (0);
770 }
771 
772 /*
773  * Helper function for raised chroot(2) security function:  Refuse if
774  * any filedescriptors are open directories.
775  */
776 static int
777 chroot_refuse_vdir_fds(fdp)
778 	struct filedesc *fdp;
779 {
780 	struct vnode *vp;
781 	struct file *fp;
782 	int fd;
783 
784 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
785 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
786 		fp = fget_locked(fdp, fd);
787 		if (fp == NULL)
788 			continue;
789 		if (fp->f_type == DTYPE_VNODE) {
790 			vp = fp->f_vnode;
791 			if (vp->v_type == VDIR)
792 				return (EPERM);
793 		}
794 	}
795 	return (0);
796 }
797 
798 /*
799  * This sysctl determines if we will allow a process to chroot(2) if it
800  * has a directory open:
801  *	0: disallowed for all processes.
802  *	1: allowed for processes that were not already chroot(2)'ed.
803  *	2: allowed for all processes.
804  */
805 
806 static int chroot_allow_open_directories = 1;
807 
808 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
809      &chroot_allow_open_directories, 0, "");
810 
811 /*
812  * Change notion of root (``/'') directory.
813  */
814 #ifndef _SYS_SYSPROTO_H_
815 struct chroot_args {
816 	char	*path;
817 };
818 #endif
819 int
820 chroot(td, uap)
821 	struct thread *td;
822 	struct chroot_args /* {
823 		char *path;
824 	} */ *uap;
825 {
826 	int error;
827 	struct nameidata nd;
828 	int vfslocked;
829 
830 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
831 	if (error)
832 		return (error);
833 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
834 	    UIO_USERSPACE, uap->path, td);
835 	error = namei(&nd);
836 	if (error)
837 		goto error;
838 	vfslocked = NDHASGIANT(&nd);
839 	if ((error = change_dir(nd.ni_vp, td)) != 0)
840 		goto e_vunlock;
841 #ifdef MAC
842 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
843 		goto e_vunlock;
844 #endif
845 	VOP_UNLOCK(nd.ni_vp, 0, td);
846 	error = change_root(nd.ni_vp, td);
847 	vrele(nd.ni_vp);
848 	VFS_UNLOCK_GIANT(vfslocked);
849 	NDFREE(&nd, NDF_ONLY_PNBUF);
850 	return (error);
851 e_vunlock:
852 	vput(nd.ni_vp);
853 	VFS_UNLOCK_GIANT(vfslocked);
854 error:
855 	NDFREE(&nd, NDF_ONLY_PNBUF);
856 	return (error);
857 }
858 
859 /*
860  * Common routine for chroot and chdir.  Callers must provide a locked vnode
861  * instance.
862  */
863 int
864 change_dir(vp, td)
865 	struct vnode *vp;
866 	struct thread *td;
867 {
868 	int error;
869 
870 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
871 	if (vp->v_type != VDIR)
872 		return (ENOTDIR);
873 #ifdef MAC
874 	error = mac_check_vnode_chdir(td->td_ucred, vp);
875 	if (error)
876 		return (error);
877 #endif
878 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
879 	return (error);
880 }
881 
882 /*
883  * Common routine for kern_chroot() and jail_attach().  The caller is
884  * responsible for invoking suser() and mac_check_chroot() to authorize this
885  * operation.
886  */
887 int
888 change_root(vp, td)
889 	struct vnode *vp;
890 	struct thread *td;
891 {
892 	struct filedesc *fdp;
893 	struct vnode *oldvp;
894 	int error;
895 
896 	VFS_ASSERT_GIANT(vp->v_mount);
897 	fdp = td->td_proc->p_fd;
898 	FILEDESC_LOCK(fdp);
899 	if (chroot_allow_open_directories == 0 ||
900 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
901 		error = chroot_refuse_vdir_fds(fdp);
902 		if (error) {
903 			FILEDESC_UNLOCK(fdp);
904 			return (error);
905 		}
906 	}
907 	oldvp = fdp->fd_rdir;
908 	fdp->fd_rdir = vp;
909 	VREF(fdp->fd_rdir);
910 	if (!fdp->fd_jdir) {
911 		fdp->fd_jdir = vp;
912 		VREF(fdp->fd_jdir);
913 	}
914 	FILEDESC_UNLOCK(fdp);
915 	vrele(oldvp);
916 	return (0);
917 }
918 
919 /*
920  * Check permissions, allocate an open file structure,
921  * and call the device open routine if any.
922  *
923  * MP SAFE
924  */
925 #ifndef _SYS_SYSPROTO_H_
926 struct open_args {
927 	char	*path;
928 	int	flags;
929 	int	mode;
930 };
931 #endif
932 int
933 open(td, uap)
934 	struct thread *td;
935 	register struct open_args /* {
936 		char *path;
937 		int flags;
938 		int mode;
939 	} */ *uap;
940 {
941 	int error;
942 
943 	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
944 	if (mtx_owned(&Giant))
945 		printf("open: %s: %d\n", uap->path, error);
946 	return (error);
947 }
948 
949 int
950 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
951     int mode)
952 {
953 	struct proc *p = td->td_proc;
954 	struct filedesc *fdp = p->p_fd;
955 	struct file *fp;
956 	struct vnode *vp;
957 	struct vattr vat;
958 	struct mount *mp;
959 	int cmode;
960 	struct file *nfp;
961 	int type, indx, error;
962 	struct flock lf;
963 	struct nameidata nd;
964 	int vfslocked;
965 
966 	if ((flags & O_ACCMODE) == O_ACCMODE)
967 		return (EINVAL);
968 	flags = FFLAGS(flags);
969 	error = falloc(td, &nfp, &indx);
970 	if (error)
971 		return (error);
972 	/* An extra reference on `nfp' has been held for us by falloc(). */
973 	fp = nfp;
974 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
975 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
976 	td->td_dupfd = -1;		/* XXX check for fdopen */
977 	error = vn_open(&nd, &flags, cmode, indx);
978 	if (error) {
979 		/*
980 		 * If the vn_open replaced the method vector, something
981 		 * wonderous happened deep below and we just pass it up
982 		 * pretending we know what we do.
983 		 */
984 		if (error == ENXIO && fp->f_ops != &badfileops) {
985 			fdrop(fp, td);
986 			td->td_retval[0] = indx;
987 			return (0);
988 		}
989 
990 		/*
991 		 * release our own reference
992 		 */
993 		fdrop(fp, td);
994 
995 		/*
996 		 * handle special fdopen() case.  bleh.  dupfdopen() is
997 		 * responsible for dropping the old contents of ofiles[indx]
998 		 * if it succeeds.
999 		 */
1000 		if ((error == ENODEV || error == ENXIO) &&
1001 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1002 		    (error =
1003 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1004 			td->td_retval[0] = indx;
1005 			return (0);
1006 		}
1007 		/*
1008 		 * Clean up the descriptor, but only if another thread hadn't
1009 		 * replaced or closed it.
1010 		 */
1011 		fdclose(fdp, fp, indx, td);
1012 
1013 		if (error == ERESTART)
1014 			error = EINTR;
1015 		return (error);
1016 	}
1017 	td->td_dupfd = 0;
1018 	vfslocked = NDHASGIANT(&nd);
1019 	NDFREE(&nd, NDF_ONLY_PNBUF);
1020 	vp = nd.ni_vp;
1021 
1022 	/*
1023 	 * There should be 2 references on the file, one from the descriptor
1024 	 * table, and one for us.
1025 	 *
1026 	 * Handle the case where someone closed the file (via its file
1027 	 * descriptor) while we were blocked.  The end result should look
1028 	 * like opening the file succeeded but it was immediately closed.
1029 	 * We call vn_close() manually because we haven't yet hooked up
1030 	 * the various 'struct file' fields.
1031 	 */
1032 	FILEDESC_LOCK(fdp);
1033 	FILE_LOCK(fp);
1034 	if (fp->f_count == 1) {
1035 		mp = vp->v_mount;
1036 		KASSERT(fdp->fd_ofiles[indx] != fp,
1037 		    ("Open file descriptor lost all refs"));
1038 		FILE_UNLOCK(fp);
1039 		FILEDESC_UNLOCK(fdp);
1040 		VOP_UNLOCK(vp, 0, td);
1041 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1042 		VFS_UNLOCK_GIANT(vfslocked);
1043 		fdrop(fp, td);
1044 		td->td_retval[0] = indx;
1045 		return (0);
1046 	}
1047 	fp->f_vnode = vp;
1048 	if (fp->f_data == NULL)
1049 		fp->f_data = vp;
1050 	fp->f_flag = flags & FMASK;
1051 	if (fp->f_ops == &badfileops)
1052 		fp->f_ops = &vnops;
1053 	fp->f_seqcount = 1;
1054 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1055 	FILE_UNLOCK(fp);
1056 	FILEDESC_UNLOCK(fdp);
1057 
1058 	VOP_UNLOCK(vp, 0, td);
1059 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1060 		lf.l_whence = SEEK_SET;
1061 		lf.l_start = 0;
1062 		lf.l_len = 0;
1063 		if (flags & O_EXLOCK)
1064 			lf.l_type = F_WRLCK;
1065 		else
1066 			lf.l_type = F_RDLCK;
1067 		type = F_FLOCK;
1068 		if ((flags & FNONBLOCK) == 0)
1069 			type |= F_WAIT;
1070 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1071 			    type)) != 0)
1072 			goto bad;
1073 		fp->f_flag |= FHASLOCK;
1074 	}
1075 	if (flags & O_TRUNC) {
1076 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1077 			goto bad;
1078 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1079 		VATTR_NULL(&vat);
1080 		vat.va_size = 0;
1081 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1082 #ifdef MAC
1083 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1084 		if (error == 0)
1085 #endif
1086 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1087 		VOP_UNLOCK(vp, 0, td);
1088 		vn_finished_write(mp);
1089 		if (error)
1090 			goto bad;
1091 	}
1092 	VFS_UNLOCK_GIANT(vfslocked);
1093 	/*
1094 	 * Release our private reference, leaving the one associated with
1095 	 * the descriptor table intact.
1096 	 */
1097 	fdrop(fp, td);
1098 	td->td_retval[0] = indx;
1099 	return (0);
1100 bad:
1101 	VFS_UNLOCK_GIANT(vfslocked);
1102 	fdclose(fdp, fp, indx, td);
1103 	fdrop(fp, td);
1104 	return (error);
1105 }
1106 
1107 #ifdef COMPAT_43
1108 /*
1109  * Create a file.
1110  *
1111  * MP SAFE
1112  */
1113 #ifndef _SYS_SYSPROTO_H_
1114 struct ocreat_args {
1115 	char	*path;
1116 	int	mode;
1117 };
1118 #endif
1119 int
1120 ocreat(td, uap)
1121 	struct thread *td;
1122 	register struct ocreat_args /* {
1123 		char *path;
1124 		int mode;
1125 	} */ *uap;
1126 {
1127 
1128 	return (kern_open(td, uap->path, UIO_USERSPACE,
1129 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1130 }
1131 #endif /* COMPAT_43 */
1132 
1133 /*
1134  * Create a special file.
1135  */
1136 #ifndef _SYS_SYSPROTO_H_
1137 struct mknod_args {
1138 	char	*path;
1139 	int	mode;
1140 	int	dev;
1141 };
1142 #endif
1143 int
1144 mknod(td, uap)
1145 	struct thread *td;
1146 	register struct mknod_args /* {
1147 		char *path;
1148 		int mode;
1149 		int dev;
1150 	} */ *uap;
1151 {
1152 
1153 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1154 }
1155 
1156 int
1157 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1158     int dev)
1159 {
1160 	struct vnode *vp;
1161 	struct mount *mp;
1162 	struct vattr vattr;
1163 	int error;
1164 	int whiteout = 0;
1165 	struct nameidata nd;
1166 	int vfslocked;
1167 
1168 	switch (mode & S_IFMT) {
1169 	case S_IFCHR:
1170 	case S_IFBLK:
1171 		error = suser(td);
1172 		break;
1173 	default:
1174 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1175 		break;
1176 	}
1177 	if (error)
1178 		return (error);
1179 restart:
1180 	bwillwrite();
1181 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1182 	if ((error = namei(&nd)) != 0)
1183 		return (error);
1184 	vfslocked = NDHASGIANT(&nd);
1185 	vp = nd.ni_vp;
1186 	if (vp != NULL) {
1187 		NDFREE(&nd, NDF_ONLY_PNBUF);
1188 		vrele(vp);
1189 		if (vp == nd.ni_dvp)
1190 			vrele(nd.ni_dvp);
1191 		else
1192 			vput(nd.ni_dvp);
1193 		VFS_UNLOCK_GIANT(vfslocked);
1194 		return (EEXIST);
1195 	} else {
1196 		VATTR_NULL(&vattr);
1197 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1198 		vattr.va_mode = (mode & ALLPERMS) &
1199 		    ~td->td_proc->p_fd->fd_cmask;
1200 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1201 		vattr.va_rdev = dev;
1202 		whiteout = 0;
1203 
1204 		switch (mode & S_IFMT) {
1205 		case S_IFMT:	/* used by badsect to flag bad sectors */
1206 			vattr.va_type = VBAD;
1207 			break;
1208 		case S_IFCHR:
1209 			vattr.va_type = VCHR;
1210 			break;
1211 		case S_IFBLK:
1212 			vattr.va_type = VBLK;
1213 			break;
1214 		case S_IFWHT:
1215 			whiteout = 1;
1216 			break;
1217 		default:
1218 			error = EINVAL;
1219 			break;
1220 		}
1221 	}
1222 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1223 		NDFREE(&nd, NDF_ONLY_PNBUF);
1224 		vput(nd.ni_dvp);
1225 		VFS_UNLOCK_GIANT(vfslocked);
1226 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1227 			return (error);
1228 		goto restart;
1229 	}
1230 #ifdef MAC
1231 	if (error == 0 && !whiteout)
1232 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1233 		    &nd.ni_cnd, &vattr);
1234 #endif
1235 	if (!error) {
1236 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1237 		if (whiteout)
1238 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1239 		else {
1240 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1241 						&nd.ni_cnd, &vattr);
1242 			if (error == 0)
1243 				vput(nd.ni_vp);
1244 		}
1245 	}
1246 	NDFREE(&nd, NDF_ONLY_PNBUF);
1247 	vput(nd.ni_dvp);
1248 	vn_finished_write(mp);
1249 	VFS_UNLOCK_GIANT(vfslocked);
1250 	return (error);
1251 }
1252 
1253 /*
1254  * Create a named pipe.
1255  */
1256 #ifndef _SYS_SYSPROTO_H_
1257 struct mkfifo_args {
1258 	char	*path;
1259 	int	mode;
1260 };
1261 #endif
1262 int
1263 mkfifo(td, uap)
1264 	struct thread *td;
1265 	register struct mkfifo_args /* {
1266 		char *path;
1267 		int mode;
1268 	} */ *uap;
1269 {
1270 
1271 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1272 }
1273 
1274 int
1275 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1276 {
1277 	struct mount *mp;
1278 	struct vattr vattr;
1279 	int error;
1280 	struct nameidata nd;
1281 	int vfslocked;
1282 
1283 restart:
1284 	bwillwrite();
1285 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1286 	if ((error = namei(&nd)) != 0)
1287 		return (error);
1288 	vfslocked = NDHASGIANT(&nd);
1289 	if (nd.ni_vp != NULL) {
1290 		NDFREE(&nd, NDF_ONLY_PNBUF);
1291 		vrele(nd.ni_vp);
1292 		if (nd.ni_vp == nd.ni_dvp)
1293 			vrele(nd.ni_dvp);
1294 		else
1295 			vput(nd.ni_dvp);
1296 		VFS_UNLOCK_GIANT(vfslocked);
1297 		return (EEXIST);
1298 	}
1299 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1300 		NDFREE(&nd, NDF_ONLY_PNBUF);
1301 		vput(nd.ni_dvp);
1302 		VFS_UNLOCK_GIANT(vfslocked);
1303 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1304 			return (error);
1305 		goto restart;
1306 	}
1307 	VATTR_NULL(&vattr);
1308 	vattr.va_type = VFIFO;
1309 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1310 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1311 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1312 #ifdef MAC
1313 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1314 	    &vattr);
1315 	if (error)
1316 		goto out;
1317 #endif
1318 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1319 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1320 	if (error == 0)
1321 		vput(nd.ni_vp);
1322 #ifdef MAC
1323 out:
1324 #endif
1325 	vput(nd.ni_dvp);
1326 	vn_finished_write(mp);
1327 	VFS_UNLOCK_GIANT(vfslocked);
1328 	NDFREE(&nd, NDF_ONLY_PNBUF);
1329 	return (error);
1330 }
1331 
1332 /*
1333  * Make a hard file link.
1334  */
1335 #ifndef _SYS_SYSPROTO_H_
1336 struct link_args {
1337 	char	*path;
1338 	char	*link;
1339 };
1340 #endif
1341 int
1342 link(td, uap)
1343 	struct thread *td;
1344 	register struct link_args /* {
1345 		char *path;
1346 		char *link;
1347 	} */ *uap;
1348 {
1349 	int error;
1350 
1351 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1352 	return (error);
1353 }
1354 
1355 SYSCTL_DECL(_security_bsd);
1356 
1357 static int hardlink_check_uid = 0;
1358 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1359     &hardlink_check_uid, 0,
1360     "Unprivileged processes cannot create hard links to files owned by other "
1361     "users");
1362 static int hardlink_check_gid = 0;
1363 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1364     &hardlink_check_gid, 0,
1365     "Unprivileged processes cannot create hard links to files owned by other "
1366     "groups");
1367 
1368 static int
1369 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1370 {
1371 	struct vattr va;
1372 	int error;
1373 
1374 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1375 		return (0);
1376 
1377 	if (!hardlink_check_uid && !hardlink_check_gid)
1378 		return (0);
1379 
1380 	error = VOP_GETATTR(vp, &va, cred, td);
1381 	if (error != 0)
1382 		return (error);
1383 
1384 	if (hardlink_check_uid) {
1385 		if (cred->cr_uid != va.va_uid)
1386 			return (EPERM);
1387 	}
1388 
1389 	if (hardlink_check_gid) {
1390 		if (!groupmember(va.va_gid, cred))
1391 			return (EPERM);
1392 	}
1393 
1394 	return (0);
1395 }
1396 
1397 int
1398 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1399 {
1400 	struct vnode *vp;
1401 	struct mount *mp;
1402 	struct nameidata nd;
1403 	int vfslocked;
1404 	int lvfslocked;
1405 	int error;
1406 
1407 	bwillwrite();
1408 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td);
1409 	if ((error = namei(&nd)) != 0)
1410 		return (error);
1411 	vfslocked = NDHASGIANT(&nd);
1412 	NDFREE(&nd, NDF_ONLY_PNBUF);
1413 	vp = nd.ni_vp;
1414 	if (vp->v_type == VDIR) {
1415 		vrele(vp);
1416 		VFS_UNLOCK_GIANT(vfslocked);
1417 		return (EPERM);		/* POSIX */
1418 	}
1419 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1420 		vrele(vp);
1421 		VFS_UNLOCK_GIANT(vfslocked);
1422 		return (error);
1423 	}
1424 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td);
1425 	if ((error = namei(&nd)) == 0) {
1426 		lvfslocked = NDHASGIANT(&nd);
1427 		if (nd.ni_vp != NULL) {
1428 			vrele(nd.ni_vp);
1429 			if (nd.ni_dvp == nd.ni_vp)
1430 				vrele(nd.ni_dvp);
1431 			else
1432 				vput(nd.ni_dvp);
1433 			error = EEXIST;
1434 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1435 		    == 0) {
1436 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1437 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1438 			error = can_hardlink(vp, td, td->td_ucred);
1439 			if (error == 0)
1440 #ifdef MAC
1441 				error = mac_check_vnode_link(td->td_ucred,
1442 				    nd.ni_dvp, vp, &nd.ni_cnd);
1443 			if (error == 0)
1444 #endif
1445 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1446 			VOP_UNLOCK(vp, 0, td);
1447 			vput(nd.ni_dvp);
1448 		}
1449 		NDFREE(&nd, NDF_ONLY_PNBUF);
1450 		VFS_UNLOCK_GIANT(lvfslocked);
1451 	}
1452 	vrele(vp);
1453 	vn_finished_write(mp);
1454 	VFS_UNLOCK_GIANT(vfslocked);
1455 	return (error);
1456 }
1457 
1458 /*
1459  * Make a symbolic link.
1460  */
1461 #ifndef _SYS_SYSPROTO_H_
1462 struct symlink_args {
1463 	char	*path;
1464 	char	*link;
1465 };
1466 #endif
1467 int
1468 symlink(td, uap)
1469 	struct thread *td;
1470 	register struct symlink_args /* {
1471 		char *path;
1472 		char *link;
1473 	} */ *uap;
1474 {
1475 
1476 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1477 }
1478 
1479 int
1480 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1481 {
1482 	struct mount *mp;
1483 	struct vattr vattr;
1484 	char *syspath;
1485 	int error;
1486 	struct nameidata nd;
1487 	int vfslocked;
1488 
1489 	if (segflg == UIO_SYSSPACE) {
1490 		syspath = path;
1491 	} else {
1492 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1493 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1494 			goto out;
1495 	}
1496 restart:
1497 	bwillwrite();
1498 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE,
1499 	    segflg, link, td);
1500 	if ((error = namei(&nd)) != 0)
1501 		goto out;
1502 	vfslocked = NDHASGIANT(&nd);
1503 	if (nd.ni_vp) {
1504 		NDFREE(&nd, NDF_ONLY_PNBUF);
1505 		vrele(nd.ni_vp);
1506 		if (nd.ni_vp == nd.ni_dvp)
1507 			vrele(nd.ni_dvp);
1508 		else
1509 			vput(nd.ni_dvp);
1510 		VFS_UNLOCK_GIANT(vfslocked);
1511 		error = EEXIST;
1512 		goto out;
1513 	}
1514 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1515 		NDFREE(&nd, NDF_ONLY_PNBUF);
1516 		vput(nd.ni_dvp);
1517 		VFS_UNLOCK_GIANT(vfslocked);
1518 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1519 			goto out;
1520 		goto restart;
1521 	}
1522 	VATTR_NULL(&vattr);
1523 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1524 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1525 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1526 #ifdef MAC
1527 	vattr.va_type = VLNK;
1528 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1529 	    &vattr);
1530 	if (error)
1531 		goto out2;
1532 #endif
1533 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1534 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1535 	if (error == 0)
1536 		vput(nd.ni_vp);
1537 #ifdef MAC
1538 out2:
1539 #endif
1540 	NDFREE(&nd, NDF_ONLY_PNBUF);
1541 	vput(nd.ni_dvp);
1542 	vn_finished_write(mp);
1543 	VFS_UNLOCK_GIANT(vfslocked);
1544 out:
1545 	if (segflg != UIO_SYSSPACE)
1546 		uma_zfree(namei_zone, syspath);
1547 	return (error);
1548 }
1549 
1550 /*
1551  * Delete a whiteout from the filesystem.
1552  */
1553 int
1554 undelete(td, uap)
1555 	struct thread *td;
1556 	register struct undelete_args /* {
1557 		char *path;
1558 	} */ *uap;
1559 {
1560 	int error;
1561 	struct mount *mp;
1562 	struct nameidata nd;
1563 	int vfslocked;
1564 
1565 restart:
1566 	bwillwrite();
1567 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1568 	    uap->path, td);
1569 	error = namei(&nd);
1570 	if (error)
1571 		return (error);
1572 	vfslocked = NDHASGIANT(&nd);
1573 
1574 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1575 		NDFREE(&nd, NDF_ONLY_PNBUF);
1576 		if (nd.ni_vp)
1577 			vrele(nd.ni_vp);
1578 		if (nd.ni_vp == nd.ni_dvp)
1579 			vrele(nd.ni_dvp);
1580 		else
1581 			vput(nd.ni_dvp);
1582 		VFS_UNLOCK_GIANT(vfslocked);
1583 		return (EEXIST);
1584 	}
1585 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1586 		NDFREE(&nd, NDF_ONLY_PNBUF);
1587 		vput(nd.ni_dvp);
1588 		VFS_UNLOCK_GIANT(vfslocked);
1589 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1590 			return (error);
1591 		goto restart;
1592 	}
1593 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1594 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1595 	NDFREE(&nd, NDF_ONLY_PNBUF);
1596 	vput(nd.ni_dvp);
1597 	vn_finished_write(mp);
1598 	VFS_UNLOCK_GIANT(vfslocked);
1599 	return (error);
1600 }
1601 
1602 /*
1603  * Delete a name from the filesystem.
1604  */
1605 #ifndef _SYS_SYSPROTO_H_
1606 struct unlink_args {
1607 	char	*path;
1608 };
1609 #endif
1610 int
1611 unlink(td, uap)
1612 	struct thread *td;
1613 	struct unlink_args /* {
1614 		char *path;
1615 	} */ *uap;
1616 {
1617 	int error;
1618 
1619 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1620 	return (error);
1621 }
1622 
1623 int
1624 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1625 {
1626 	struct mount *mp;
1627 	struct vnode *vp;
1628 	int error;
1629 	struct nameidata nd;
1630 	int vfslocked;
1631 
1632 restart:
1633 	bwillwrite();
1634 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1635 	if ((error = namei(&nd)) != 0)
1636 		return (error);
1637 	vfslocked = NDHASGIANT(&nd);
1638 	vp = nd.ni_vp;
1639 	if (vp->v_type == VDIR)
1640 		error = EPERM;		/* POSIX */
1641 	else {
1642 		/*
1643 		 * The root of a mounted filesystem cannot be deleted.
1644 		 *
1645 		 * XXX: can this only be a VDIR case?
1646 		 */
1647 		if (vp->v_vflag & VV_ROOT)
1648 			error = EBUSY;
1649 	}
1650 	if (error == 0) {
1651 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1652 			NDFREE(&nd, NDF_ONLY_PNBUF);
1653 			if (vp == nd.ni_dvp)
1654 				vrele(vp);
1655 			else
1656 				vput(vp);
1657 			vput(nd.ni_dvp);
1658 			VFS_UNLOCK_GIANT(vfslocked);
1659 			if ((error = vn_start_write(NULL, &mp,
1660 			    V_XSLEEP | PCATCH)) != 0)
1661 				return (error);
1662 			goto restart;
1663 		}
1664 #ifdef MAC
1665 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1666 		    &nd.ni_cnd);
1667 		if (error)
1668 			goto out;
1669 #endif
1670 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1671 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1672 #ifdef MAC
1673 out:
1674 #endif
1675 		vn_finished_write(mp);
1676 	}
1677 	NDFREE(&nd, NDF_ONLY_PNBUF);
1678 	if (vp == nd.ni_dvp)
1679 		vrele(vp);
1680 	else
1681 		vput(vp);
1682 	vput(nd.ni_dvp);
1683 	VFS_UNLOCK_GIANT(vfslocked);
1684 	return (error);
1685 }
1686 
1687 /*
1688  * Reposition read/write file offset.
1689  */
1690 #ifndef _SYS_SYSPROTO_H_
1691 struct lseek_args {
1692 	int	fd;
1693 	int	pad;
1694 	off_t	offset;
1695 	int	whence;
1696 };
1697 #endif
1698 int
1699 lseek(td, uap)
1700 	struct thread *td;
1701 	register struct lseek_args /* {
1702 		int fd;
1703 		int pad;
1704 		off_t offset;
1705 		int whence;
1706 	} */ *uap;
1707 {
1708 	struct ucred *cred = td->td_ucred;
1709 	struct file *fp;
1710 	struct vnode *vp;
1711 	struct vattr vattr;
1712 	off_t offset;
1713 	int error, noneg;
1714 	int vfslocked;
1715 
1716 	if ((error = fget(td, uap->fd, &fp)) != 0)
1717 		return (error);
1718 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1719 		fdrop(fp, td);
1720 		return (ESPIPE);
1721 	}
1722 	vp = fp->f_vnode;
1723 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1724 	noneg = (vp->v_type != VCHR);
1725 	offset = uap->offset;
1726 	switch (uap->whence) {
1727 	case L_INCR:
1728 		if (noneg &&
1729 		    (fp->f_offset < 0 ||
1730 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1731 			error = EOVERFLOW;
1732 			break;
1733 		}
1734 		offset += fp->f_offset;
1735 		break;
1736 	case L_XTND:
1737 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1738 		error = VOP_GETATTR(vp, &vattr, cred, td);
1739 		VOP_UNLOCK(vp, 0, td);
1740 		if (error)
1741 			break;
1742 		if (noneg &&
1743 		    (vattr.va_size > OFF_MAX ||
1744 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1745 			error = EOVERFLOW;
1746 			break;
1747 		}
1748 		offset += vattr.va_size;
1749 		break;
1750 	case L_SET:
1751 		break;
1752 	default:
1753 		error = EINVAL;
1754 	}
1755 	if (error == 0 && noneg && offset < 0)
1756 		error = EINVAL;
1757 	if (error != 0)
1758 		goto drop;
1759 	fp->f_offset = offset;
1760 	*(off_t *)(td->td_retval) = fp->f_offset;
1761 drop:
1762 	fdrop(fp, td);
1763 	VFS_UNLOCK_GIANT(vfslocked);
1764 	return (error);
1765 }
1766 
1767 #if defined(COMPAT_43)
1768 /*
1769  * Reposition read/write file offset.
1770  */
1771 #ifndef _SYS_SYSPROTO_H_
1772 struct olseek_args {
1773 	int	fd;
1774 	long	offset;
1775 	int	whence;
1776 };
1777 #endif
1778 int
1779 olseek(td, uap)
1780 	struct thread *td;
1781 	register struct olseek_args /* {
1782 		int fd;
1783 		long offset;
1784 		int whence;
1785 	} */ *uap;
1786 {
1787 	struct lseek_args /* {
1788 		int fd;
1789 		int pad;
1790 		off_t offset;
1791 		int whence;
1792 	} */ nuap;
1793 	int error;
1794 
1795 	nuap.fd = uap->fd;
1796 	nuap.offset = uap->offset;
1797 	nuap.whence = uap->whence;
1798 	error = lseek(td, &nuap);
1799 	return (error);
1800 }
1801 #endif /* COMPAT_43 */
1802 
1803 /*
1804  * Check access permissions using passed credentials.
1805  */
1806 static int
1807 vn_access(vp, user_flags, cred, td)
1808 	struct vnode	*vp;
1809 	int		user_flags;
1810 	struct ucred	*cred;
1811 	struct thread	*td;
1812 {
1813 	int error, flags;
1814 
1815 	/* Flags == 0 means only check for existence. */
1816 	error = 0;
1817 	if (user_flags) {
1818 		flags = 0;
1819 		if (user_flags & R_OK)
1820 			flags |= VREAD;
1821 		if (user_flags & W_OK)
1822 			flags |= VWRITE;
1823 		if (user_flags & X_OK)
1824 			flags |= VEXEC;
1825 #ifdef MAC
1826 		error = mac_check_vnode_access(cred, vp, flags);
1827 		if (error)
1828 			return (error);
1829 #endif
1830 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1831 			error = VOP_ACCESS(vp, flags, cred, td);
1832 	}
1833 	return (error);
1834 }
1835 
1836 /*
1837  * Check access permissions using "real" credentials.
1838  */
1839 #ifndef _SYS_SYSPROTO_H_
1840 struct access_args {
1841 	char	*path;
1842 	int	flags;
1843 };
1844 #endif
1845 int
1846 access(td, uap)
1847 	struct thread *td;
1848 	register struct access_args /* {
1849 		char *path;
1850 		int flags;
1851 	} */ *uap;
1852 {
1853 
1854 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1855 }
1856 
1857 int
1858 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1859 {
1860 	struct ucred *cred, *tmpcred;
1861 	register struct vnode *vp;
1862 	struct nameidata nd;
1863 	int vfslocked;
1864 	int error;
1865 
1866 	/*
1867 	 * Create and modify a temporary credential instead of one that
1868 	 * is potentially shared.  This could also mess up socket
1869 	 * buffer accounting which can run in an interrupt context.
1870 	 */
1871 	cred = td->td_ucred;
1872 	tmpcred = crdup(cred);
1873 	tmpcred->cr_uid = cred->cr_ruid;
1874 	tmpcred->cr_groups[0] = cred->cr_rgid;
1875 	td->td_ucred = tmpcred;
1876 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
1877 	if ((error = namei(&nd)) != 0)
1878 		goto out1;
1879 	vfslocked = NDHASGIANT(&nd);
1880 	vp = nd.ni_vp;
1881 
1882 	error = vn_access(vp, flags, tmpcred, td);
1883 	NDFREE(&nd, NDF_ONLY_PNBUF);
1884 	vput(vp);
1885 	VFS_UNLOCK_GIANT(vfslocked);
1886 out1:
1887 	td->td_ucred = cred;
1888 	crfree(tmpcred);
1889 	return (error);
1890 }
1891 
1892 /*
1893  * Check access permissions using "effective" credentials.
1894  */
1895 #ifndef _SYS_SYSPROTO_H_
1896 struct eaccess_args {
1897 	char	*path;
1898 	int	flags;
1899 };
1900 #endif
1901 int
1902 eaccess(td, uap)
1903 	struct thread *td;
1904 	register struct eaccess_args /* {
1905 		char *path;
1906 		int flags;
1907 	} */ *uap;
1908 {
1909 	struct nameidata nd;
1910 	struct vnode *vp;
1911 	int vfslocked;
1912 	int error;
1913 
1914 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
1915 	    uap->path, td);
1916 	if ((error = namei(&nd)) != 0)
1917 		return (error);
1918 	vp = nd.ni_vp;
1919 	vfslocked = NDHASGIANT(&nd);
1920 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1921 	NDFREE(&nd, NDF_ONLY_PNBUF);
1922 	vput(vp);
1923 	VFS_UNLOCK_GIANT(vfslocked);
1924 	return (error);
1925 }
1926 
1927 #if defined(COMPAT_43)
1928 /*
1929  * Get file status; this version follows links.
1930  */
1931 #ifndef _SYS_SYSPROTO_H_
1932 struct ostat_args {
1933 	char	*path;
1934 	struct ostat *ub;
1935 };
1936 #endif
1937 int
1938 ostat(td, uap)
1939 	struct thread *td;
1940 	register struct ostat_args /* {
1941 		char *path;
1942 		struct ostat *ub;
1943 	} */ *uap;
1944 {
1945 	struct stat sb;
1946 	struct ostat osb;
1947 	int error;
1948 
1949 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1950 	if (error)
1951 		return (error);
1952 	cvtstat(&sb, &osb);
1953 	error = copyout(&osb, uap->ub, sizeof (osb));
1954 	return (error);
1955 }
1956 
1957 /*
1958  * Get file status; this version does not follow links.
1959  */
1960 #ifndef _SYS_SYSPROTO_H_
1961 struct olstat_args {
1962 	char	*path;
1963 	struct ostat *ub;
1964 };
1965 #endif
1966 int
1967 olstat(td, uap)
1968 	struct thread *td;
1969 	register struct olstat_args /* {
1970 		char *path;
1971 		struct ostat *ub;
1972 	} */ *uap;
1973 {
1974 	struct stat sb;
1975 	struct ostat osb;
1976 	int error;
1977 
1978 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1979 	if (error)
1980 		return (error);
1981 	cvtstat(&sb, &osb);
1982 	error = copyout(&osb, uap->ub, sizeof (osb));
1983 	return (error);
1984 }
1985 
1986 /*
1987  * Convert from an old to a new stat structure.
1988  */
1989 void
1990 cvtstat(st, ost)
1991 	struct stat *st;
1992 	struct ostat *ost;
1993 {
1994 
1995 	ost->st_dev = st->st_dev;
1996 	ost->st_ino = st->st_ino;
1997 	ost->st_mode = st->st_mode;
1998 	ost->st_nlink = st->st_nlink;
1999 	ost->st_uid = st->st_uid;
2000 	ost->st_gid = st->st_gid;
2001 	ost->st_rdev = st->st_rdev;
2002 	if (st->st_size < (quad_t)1 << 32)
2003 		ost->st_size = st->st_size;
2004 	else
2005 		ost->st_size = -2;
2006 	ost->st_atime = st->st_atime;
2007 	ost->st_mtime = st->st_mtime;
2008 	ost->st_ctime = st->st_ctime;
2009 	ost->st_blksize = st->st_blksize;
2010 	ost->st_blocks = st->st_blocks;
2011 	ost->st_flags = st->st_flags;
2012 	ost->st_gen = st->st_gen;
2013 }
2014 #endif /* COMPAT_43 */
2015 
2016 /*
2017  * Get file status; this version follows links.
2018  */
2019 #ifndef _SYS_SYSPROTO_H_
2020 struct stat_args {
2021 	char	*path;
2022 	struct stat *ub;
2023 };
2024 #endif
2025 int
2026 stat(td, uap)
2027 	struct thread *td;
2028 	register struct stat_args /* {
2029 		char *path;
2030 		struct stat *ub;
2031 	} */ *uap;
2032 {
2033 	struct stat sb;
2034 	int error;
2035 
2036 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2037 	if (error == 0)
2038 		error = copyout(&sb, uap->ub, sizeof (sb));
2039 	return (error);
2040 }
2041 
2042 int
2043 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2044 {
2045 	struct nameidata nd;
2046 	struct stat sb;
2047 	int error, vfslocked;
2048 
2049 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE,
2050 	    pathseg, path, td);
2051 	if ((error = namei(&nd)) != 0)
2052 		return (error);
2053 	vfslocked = NDHASGIANT(&nd);
2054 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2055 	NDFREE(&nd, NDF_ONLY_PNBUF);
2056 	vput(nd.ni_vp);
2057 	VFS_UNLOCK_GIANT(vfslocked);
2058 	if (error)
2059 		return (error);
2060 	*sbp = sb;
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Get file status; this version does not follow links.
2066  */
2067 #ifndef _SYS_SYSPROTO_H_
2068 struct lstat_args {
2069 	char	*path;
2070 	struct stat *ub;
2071 };
2072 #endif
2073 int
2074 lstat(td, uap)
2075 	struct thread *td;
2076 	register struct lstat_args /* {
2077 		char *path;
2078 		struct stat *ub;
2079 	} */ *uap;
2080 {
2081 	struct stat sb;
2082 	int error;
2083 
2084 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2085 	if (error == 0)
2086 		error = copyout(&sb, uap->ub, sizeof (sb));
2087 	return (error);
2088 }
2089 
2090 int
2091 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2092 {
2093 	struct vnode *vp;
2094 	struct stat sb;
2095 	struct nameidata nd;
2096 	int error, vfslocked;
2097 
2098 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE,
2099 	    pathseg, path, td);
2100 	if ((error = namei(&nd)) != 0)
2101 		return (error);
2102 	vfslocked = NDHASGIANT(&nd);
2103 	vp = nd.ni_vp;
2104 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2105 	NDFREE(&nd, NDF_ONLY_PNBUF);
2106 	vput(vp);
2107 	VFS_UNLOCK_GIANT(vfslocked);
2108 	if (error)
2109 		return (error);
2110 	*sbp = sb;
2111 	return (0);
2112 }
2113 
2114 /*
2115  * Implementation of the NetBSD [l]stat() functions.
2116  */
2117 void
2118 cvtnstat(sb, nsb)
2119 	struct stat *sb;
2120 	struct nstat *nsb;
2121 {
2122 	bzero(nsb, sizeof *nsb);
2123 	nsb->st_dev = sb->st_dev;
2124 	nsb->st_ino = sb->st_ino;
2125 	nsb->st_mode = sb->st_mode;
2126 	nsb->st_nlink = sb->st_nlink;
2127 	nsb->st_uid = sb->st_uid;
2128 	nsb->st_gid = sb->st_gid;
2129 	nsb->st_rdev = sb->st_rdev;
2130 	nsb->st_atimespec = sb->st_atimespec;
2131 	nsb->st_mtimespec = sb->st_mtimespec;
2132 	nsb->st_ctimespec = sb->st_ctimespec;
2133 	nsb->st_size = sb->st_size;
2134 	nsb->st_blocks = sb->st_blocks;
2135 	nsb->st_blksize = sb->st_blksize;
2136 	nsb->st_flags = sb->st_flags;
2137 	nsb->st_gen = sb->st_gen;
2138 	nsb->st_birthtimespec = sb->st_birthtimespec;
2139 }
2140 
2141 #ifndef _SYS_SYSPROTO_H_
2142 struct nstat_args {
2143 	char	*path;
2144 	struct nstat *ub;
2145 };
2146 #endif
2147 int
2148 nstat(td, uap)
2149 	struct thread *td;
2150 	register struct nstat_args /* {
2151 		char *path;
2152 		struct nstat *ub;
2153 	} */ *uap;
2154 {
2155 	struct stat sb;
2156 	struct nstat nsb;
2157 	int error;
2158 
2159 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2160 	if (error)
2161 		return (error);
2162 	cvtnstat(&sb, &nsb);
2163 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2164 	return (error);
2165 }
2166 
2167 /*
2168  * NetBSD lstat.  Get file status; this version does not follow links.
2169  */
2170 #ifndef _SYS_SYSPROTO_H_
2171 struct lstat_args {
2172 	char	*path;
2173 	struct stat *ub;
2174 };
2175 #endif
2176 int
2177 nlstat(td, uap)
2178 	struct thread *td;
2179 	register struct nlstat_args /* {
2180 		char *path;
2181 		struct nstat *ub;
2182 	} */ *uap;
2183 {
2184 	struct stat sb;
2185 	struct nstat nsb;
2186 	int error;
2187 
2188 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2189 	if (error)
2190 		return (error);
2191 	cvtnstat(&sb, &nsb);
2192 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2193 	return (error);
2194 }
2195 
2196 /*
2197  * Get configurable pathname variables.
2198  */
2199 #ifndef _SYS_SYSPROTO_H_
2200 struct pathconf_args {
2201 	char	*path;
2202 	int	name;
2203 };
2204 #endif
2205 int
2206 pathconf(td, uap)
2207 	struct thread *td;
2208 	register struct pathconf_args /* {
2209 		char *path;
2210 		int name;
2211 	} */ *uap;
2212 {
2213 
2214 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2215 }
2216 
2217 int
2218 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2219 {
2220 	struct nameidata nd;
2221 	int error, vfslocked;
2222 
2223 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2224 	if ((error = namei(&nd)) != 0)
2225 		return (error);
2226 	vfslocked = NDHASGIANT(&nd);
2227 	NDFREE(&nd, NDF_ONLY_PNBUF);
2228 
2229 	/* If asynchronous I/O is available, it works for all files. */
2230 	if (name == _PC_ASYNC_IO)
2231 		td->td_retval[0] = async_io_version;
2232 	else
2233 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2234 	vput(nd.ni_vp);
2235 	VFS_UNLOCK_GIANT(vfslocked);
2236 	return (error);
2237 }
2238 
2239 /*
2240  * Return target name of a symbolic link.
2241  */
2242 #ifndef _SYS_SYSPROTO_H_
2243 struct readlink_args {
2244 	char	*path;
2245 	char	*buf;
2246 	int	count;
2247 };
2248 #endif
2249 int
2250 readlink(td, uap)
2251 	struct thread *td;
2252 	register struct readlink_args /* {
2253 		char *path;
2254 		char *buf;
2255 		int count;
2256 	} */ *uap;
2257 {
2258 
2259 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2260 	    UIO_USERSPACE, uap->count));
2261 }
2262 
2263 int
2264 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2265     enum uio_seg bufseg, int count)
2266 {
2267 	register struct vnode *vp;
2268 	struct iovec aiov;
2269 	struct uio auio;
2270 	int error;
2271 	struct nameidata nd;
2272 	int vfslocked;
2273 
2274 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2275 	if ((error = namei(&nd)) != 0)
2276 		return (error);
2277 	NDFREE(&nd, NDF_ONLY_PNBUF);
2278 	vfslocked = NDHASGIANT(&nd);
2279 	vp = nd.ni_vp;
2280 #ifdef MAC
2281 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2282 	if (error) {
2283 		vput(vp);
2284 		VFS_UNLOCK_GIANT(vfslocked);
2285 		return (error);
2286 	}
2287 #endif
2288 	if (vp->v_type != VLNK)
2289 		error = EINVAL;
2290 	else {
2291 		aiov.iov_base = buf;
2292 		aiov.iov_len = count;
2293 		auio.uio_iov = &aiov;
2294 		auio.uio_iovcnt = 1;
2295 		auio.uio_offset = 0;
2296 		auio.uio_rw = UIO_READ;
2297 		auio.uio_segflg = bufseg;
2298 		auio.uio_td = td;
2299 		auio.uio_resid = count;
2300 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2301 	}
2302 	vput(vp);
2303 	VFS_UNLOCK_GIANT(vfslocked);
2304 	td->td_retval[0] = count - auio.uio_resid;
2305 	return (error);
2306 }
2307 
2308 /*
2309  * Common implementation code for chflags() and fchflags().
2310  */
2311 static int
2312 setfflags(td, vp, flags)
2313 	struct thread *td;
2314 	struct vnode *vp;
2315 	int flags;
2316 {
2317 	int error;
2318 	struct mount *mp;
2319 	struct vattr vattr;
2320 
2321 	/*
2322 	 * Prevent non-root users from setting flags on devices.  When
2323 	 * a device is reused, users can retain ownership of the device
2324 	 * if they are allowed to set flags and programs assume that
2325 	 * chown can't fail when done as root.
2326 	 */
2327 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2328 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2329 		if (error)
2330 			return (error);
2331 	}
2332 
2333 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2334 		return (error);
2335 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2336 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2337 	VATTR_NULL(&vattr);
2338 	vattr.va_flags = flags;
2339 #ifdef MAC
2340 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2341 	if (error == 0)
2342 #endif
2343 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2344 	VOP_UNLOCK(vp, 0, td);
2345 	vn_finished_write(mp);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * Change flags of a file given a path name.
2351  */
2352 #ifndef _SYS_SYSPROTO_H_
2353 struct chflags_args {
2354 	char	*path;
2355 	int	flags;
2356 };
2357 #endif
2358 int
2359 chflags(td, uap)
2360 	struct thread *td;
2361 	register struct chflags_args /* {
2362 		char *path;
2363 		int flags;
2364 	} */ *uap;
2365 {
2366 	int error;
2367 	struct nameidata nd;
2368 	int vfslocked;
2369 
2370 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2371 	if ((error = namei(&nd)) != 0)
2372 		return (error);
2373 	NDFREE(&nd, NDF_ONLY_PNBUF);
2374 	vfslocked = NDHASGIANT(&nd);
2375 	error = setfflags(td, nd.ni_vp, uap->flags);
2376 	vrele(nd.ni_vp);
2377 	VFS_UNLOCK_GIANT(vfslocked);
2378 	return (error);
2379 }
2380 
2381 /*
2382  * Same as chflags() but doesn't follow symlinks.
2383  */
2384 int
2385 lchflags(td, uap)
2386 	struct thread *td;
2387 	register struct lchflags_args /* {
2388 		char *path;
2389 		int flags;
2390 	} */ *uap;
2391 {
2392 	int error;
2393 	struct nameidata nd;
2394 	int vfslocked;
2395 
2396 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2397 	if ((error = namei(&nd)) != 0)
2398 		return (error);
2399 	vfslocked = NDHASGIANT(&nd);
2400 	NDFREE(&nd, NDF_ONLY_PNBUF);
2401 	error = setfflags(td, nd.ni_vp, uap->flags);
2402 	vrele(nd.ni_vp);
2403 	VFS_UNLOCK_GIANT(vfslocked);
2404 	return (error);
2405 }
2406 
2407 /*
2408  * Change flags of a file given a file descriptor.
2409  */
2410 #ifndef _SYS_SYSPROTO_H_
2411 struct fchflags_args {
2412 	int	fd;
2413 	int	flags;
2414 };
2415 #endif
2416 int
2417 fchflags(td, uap)
2418 	struct thread *td;
2419 	register struct fchflags_args /* {
2420 		int fd;
2421 		int flags;
2422 	} */ *uap;
2423 {
2424 	struct file *fp;
2425 	int vfslocked;
2426 	int error;
2427 
2428 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2429 		return (error);
2430 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2431 	error = setfflags(td, fp->f_vnode, uap->flags);
2432 	VFS_UNLOCK_GIANT(vfslocked);
2433 	fdrop(fp, td);
2434 	return (error);
2435 }
2436 
2437 /*
2438  * Common implementation code for chmod(), lchmod() and fchmod().
2439  */
2440 static int
2441 setfmode(td, vp, mode)
2442 	struct thread *td;
2443 	struct vnode *vp;
2444 	int mode;
2445 {
2446 	int error;
2447 	struct mount *mp;
2448 	struct vattr vattr;
2449 
2450 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2451 		return (error);
2452 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2453 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2454 	VATTR_NULL(&vattr);
2455 	vattr.va_mode = mode & ALLPERMS;
2456 #ifdef MAC
2457 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2458 	if (error == 0)
2459 #endif
2460 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2461 	VOP_UNLOCK(vp, 0, td);
2462 	vn_finished_write(mp);
2463 	return (error);
2464 }
2465 
2466 /*
2467  * Change mode of a file given path name.
2468  */
2469 #ifndef _SYS_SYSPROTO_H_
2470 struct chmod_args {
2471 	char	*path;
2472 	int	mode;
2473 };
2474 #endif
2475 int
2476 chmod(td, uap)
2477 	struct thread *td;
2478 	register struct chmod_args /* {
2479 		char *path;
2480 		int mode;
2481 	} */ *uap;
2482 {
2483 
2484 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2485 }
2486 
2487 int
2488 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2489 {
2490 	int error;
2491 	struct nameidata nd;
2492 	int vfslocked;
2493 
2494 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2495 	if ((error = namei(&nd)) != 0)
2496 		return (error);
2497 	vfslocked = NDHASGIANT(&nd);
2498 	NDFREE(&nd, NDF_ONLY_PNBUF);
2499 	error = setfmode(td, nd.ni_vp, mode);
2500 	vrele(nd.ni_vp);
2501 	VFS_UNLOCK_GIANT(vfslocked);
2502 	return (error);
2503 }
2504 
2505 /*
2506  * Change mode of a file given path name (don't follow links.)
2507  */
2508 #ifndef _SYS_SYSPROTO_H_
2509 struct lchmod_args {
2510 	char	*path;
2511 	int	mode;
2512 };
2513 #endif
2514 int
2515 lchmod(td, uap)
2516 	struct thread *td;
2517 	register struct lchmod_args /* {
2518 		char *path;
2519 		int mode;
2520 	} */ *uap;
2521 {
2522 	int error;
2523 	struct nameidata nd;
2524 	int vfslocked;
2525 
2526 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2527 	if ((error = namei(&nd)) != 0)
2528 		return (error);
2529 	vfslocked = NDHASGIANT(&nd);
2530 	NDFREE(&nd, NDF_ONLY_PNBUF);
2531 	error = setfmode(td, nd.ni_vp, uap->mode);
2532 	vrele(nd.ni_vp);
2533 	VFS_UNLOCK_GIANT(vfslocked);
2534 	return (error);
2535 }
2536 
2537 /*
2538  * Change mode of a file given a file descriptor.
2539  */
2540 #ifndef _SYS_SYSPROTO_H_
2541 struct fchmod_args {
2542 	int	fd;
2543 	int	mode;
2544 };
2545 #endif
2546 int
2547 fchmod(td, uap)
2548 	struct thread *td;
2549 	register struct fchmod_args /* {
2550 		int fd;
2551 		int mode;
2552 	} */ *uap;
2553 {
2554 	struct file *fp;
2555 	int vfslocked;
2556 	int error;
2557 
2558 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2559 		return (error);
2560 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2561 	error = setfmode(td, fp->f_vnode, uap->mode);
2562 	VFS_UNLOCK_GIANT(vfslocked);
2563 	fdrop(fp, td);
2564 	return (error);
2565 }
2566 
2567 /*
2568  * Common implementation for chown(), lchown(), and fchown()
2569  */
2570 static int
2571 setfown(td, vp, uid, gid)
2572 	struct thread *td;
2573 	struct vnode *vp;
2574 	uid_t uid;
2575 	gid_t gid;
2576 {
2577 	int error;
2578 	struct mount *mp;
2579 	struct vattr vattr;
2580 
2581 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2582 		return (error);
2583 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2584 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2585 	VATTR_NULL(&vattr);
2586 	vattr.va_uid = uid;
2587 	vattr.va_gid = gid;
2588 #ifdef MAC
2589 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2590 	    vattr.va_gid);
2591 	if (error == 0)
2592 #endif
2593 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2594 	VOP_UNLOCK(vp, 0, td);
2595 	vn_finished_write(mp);
2596 	return (error);
2597 }
2598 
2599 /*
2600  * Set ownership given a path name.
2601  */
2602 #ifndef _SYS_SYSPROTO_H_
2603 struct chown_args {
2604 	char	*path;
2605 	int	uid;
2606 	int	gid;
2607 };
2608 #endif
2609 int
2610 chown(td, uap)
2611 	struct thread *td;
2612 	register struct chown_args /* {
2613 		char *path;
2614 		int uid;
2615 		int gid;
2616 	} */ *uap;
2617 {
2618 
2619 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2620 }
2621 
2622 int
2623 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2624     int gid)
2625 {
2626 	int error;
2627 	struct nameidata nd;
2628 	int vfslocked;
2629 
2630 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2631 	if ((error = namei(&nd)) != 0)
2632 		return (error);
2633 	vfslocked = NDHASGIANT(&nd);
2634 	NDFREE(&nd, NDF_ONLY_PNBUF);
2635 	error = setfown(td, nd.ni_vp, uid, gid);
2636 	vrele(nd.ni_vp);
2637 	VFS_UNLOCK_GIANT(vfslocked);
2638 	return (error);
2639 }
2640 
2641 /*
2642  * Set ownership given a path name, do not cross symlinks.
2643  */
2644 #ifndef _SYS_SYSPROTO_H_
2645 struct lchown_args {
2646 	char	*path;
2647 	int	uid;
2648 	int	gid;
2649 };
2650 #endif
2651 int
2652 lchown(td, uap)
2653 	struct thread *td;
2654 	register struct lchown_args /* {
2655 		char *path;
2656 		int uid;
2657 		int gid;
2658 	} */ *uap;
2659 {
2660 
2661 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2662 }
2663 
2664 int
2665 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2666     int gid)
2667 {
2668 	int error;
2669 	struct nameidata nd;
2670 	int vfslocked;
2671 
2672 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2673 	if ((error = namei(&nd)) != 0)
2674 		return (error);
2675 	vfslocked = NDHASGIANT(&nd);
2676 	NDFREE(&nd, NDF_ONLY_PNBUF);
2677 	error = setfown(td, nd.ni_vp, uid, gid);
2678 	vrele(nd.ni_vp);
2679 	VFS_UNLOCK_GIANT(vfslocked);
2680 	return (error);
2681 }
2682 
2683 /*
2684  * Set ownership given a file descriptor.
2685  */
2686 #ifndef _SYS_SYSPROTO_H_
2687 struct fchown_args {
2688 	int	fd;
2689 	int	uid;
2690 	int	gid;
2691 };
2692 #endif
2693 int
2694 fchown(td, uap)
2695 	struct thread *td;
2696 	register struct fchown_args /* {
2697 		int fd;
2698 		int uid;
2699 		int gid;
2700 	} */ *uap;
2701 {
2702 	struct file *fp;
2703 	int vfslocked;
2704 	int error;
2705 
2706 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2707 		return (error);
2708 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2709 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2710 	VFS_UNLOCK_GIANT(vfslocked);
2711 	fdrop(fp, td);
2712 	return (error);
2713 }
2714 
2715 /*
2716  * Common implementation code for utimes(), lutimes(), and futimes().
2717  */
2718 static int
2719 getutimes(usrtvp, tvpseg, tsp)
2720 	const struct timeval *usrtvp;
2721 	enum uio_seg tvpseg;
2722 	struct timespec *tsp;
2723 {
2724 	struct timeval tv[2];
2725 	const struct timeval *tvp;
2726 	int error;
2727 
2728 	if (usrtvp == NULL) {
2729 		microtime(&tv[0]);
2730 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2731 		tsp[1] = tsp[0];
2732 	} else {
2733 		if (tvpseg == UIO_SYSSPACE) {
2734 			tvp = usrtvp;
2735 		} else {
2736 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2737 				return (error);
2738 			tvp = tv;
2739 		}
2740 
2741 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2742 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2743 			return (EINVAL);
2744 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2745 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2746 	}
2747 	return (0);
2748 }
2749 
2750 /*
2751  * Common implementation code for utimes(), lutimes(), and futimes().
2752  */
2753 static int
2754 setutimes(td, vp, ts, numtimes, nullflag)
2755 	struct thread *td;
2756 	struct vnode *vp;
2757 	const struct timespec *ts;
2758 	int numtimes;
2759 	int nullflag;
2760 {
2761 	int error, setbirthtime;
2762 	struct mount *mp;
2763 	struct vattr vattr;
2764 
2765 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2766 		return (error);
2767 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2768 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2769 	setbirthtime = 0;
2770 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2771 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2772 		setbirthtime = 1;
2773 	VATTR_NULL(&vattr);
2774 	vattr.va_atime = ts[0];
2775 	vattr.va_mtime = ts[1];
2776 	if (setbirthtime)
2777 		vattr.va_birthtime = ts[1];
2778 	if (numtimes > 2)
2779 		vattr.va_birthtime = ts[2];
2780 	if (nullflag)
2781 		vattr.va_vaflags |= VA_UTIMES_NULL;
2782 #ifdef MAC
2783 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2784 	    vattr.va_mtime);
2785 #endif
2786 	if (error == 0)
2787 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2788 	VOP_UNLOCK(vp, 0, td);
2789 	vn_finished_write(mp);
2790 	return (error);
2791 }
2792 
2793 /*
2794  * Set the access and modification times of a file.
2795  */
2796 #ifndef _SYS_SYSPROTO_H_
2797 struct utimes_args {
2798 	char	*path;
2799 	struct	timeval *tptr;
2800 };
2801 #endif
2802 int
2803 utimes(td, uap)
2804 	struct thread *td;
2805 	register struct utimes_args /* {
2806 		char *path;
2807 		struct timeval *tptr;
2808 	} */ *uap;
2809 {
2810 
2811 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2812 	    UIO_USERSPACE));
2813 }
2814 
2815 int
2816 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2817     struct timeval *tptr, enum uio_seg tptrseg)
2818 {
2819 	struct timespec ts[2];
2820 	int error;
2821 	struct nameidata nd;
2822 	int vfslocked;
2823 
2824 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2825 		return (error);
2826 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2827 	if ((error = namei(&nd)) != 0)
2828 		return (error);
2829 	vfslocked = NDHASGIANT(&nd);
2830 	NDFREE(&nd, NDF_ONLY_PNBUF);
2831 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2832 	vrele(nd.ni_vp);
2833 	VFS_UNLOCK_GIANT(vfslocked);
2834 	return (error);
2835 }
2836 
2837 /*
2838  * Set the access and modification times of a file.
2839  */
2840 #ifndef _SYS_SYSPROTO_H_
2841 struct lutimes_args {
2842 	char	*path;
2843 	struct	timeval *tptr;
2844 };
2845 #endif
2846 int
2847 lutimes(td, uap)
2848 	struct thread *td;
2849 	register struct lutimes_args /* {
2850 		char *path;
2851 		struct timeval *tptr;
2852 	} */ *uap;
2853 {
2854 
2855 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2856 	    UIO_USERSPACE));
2857 }
2858 
2859 int
2860 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2861     struct timeval *tptr, enum uio_seg tptrseg)
2862 {
2863 	struct timespec ts[2];
2864 	int error;
2865 	struct nameidata nd;
2866 	int vfslocked;
2867 
2868 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2869 		return (error);
2870 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2871 	if ((error = namei(&nd)) != 0)
2872 		return (error);
2873 	vfslocked = NDHASGIANT(&nd);
2874 	NDFREE(&nd, NDF_ONLY_PNBUF);
2875 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2876 	vrele(nd.ni_vp);
2877 	VFS_UNLOCK_GIANT(vfslocked);
2878 	return (error);
2879 }
2880 
2881 /*
2882  * Set the access and modification times of a file.
2883  */
2884 #ifndef _SYS_SYSPROTO_H_
2885 struct futimes_args {
2886 	int	fd;
2887 	struct	timeval *tptr;
2888 };
2889 #endif
2890 int
2891 futimes(td, uap)
2892 	struct thread *td;
2893 	register struct futimes_args /* {
2894 		int  fd;
2895 		struct timeval *tptr;
2896 	} */ *uap;
2897 {
2898 
2899 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2900 }
2901 
2902 int
2903 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2904     enum uio_seg tptrseg)
2905 {
2906 	struct timespec ts[2];
2907 	struct file *fp;
2908 	int vfslocked;
2909 	int error;
2910 
2911 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2912 		return (error);
2913 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2914 		return (error);
2915 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2916 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2917 	VFS_UNLOCK_GIANT(vfslocked);
2918 	fdrop(fp, td);
2919 	return (error);
2920 }
2921 
2922 /*
2923  * Truncate a file given its path name.
2924  */
2925 #ifndef _SYS_SYSPROTO_H_
2926 struct truncate_args {
2927 	char	*path;
2928 	int	pad;
2929 	off_t	length;
2930 };
2931 #endif
2932 int
2933 truncate(td, uap)
2934 	struct thread *td;
2935 	register struct truncate_args /* {
2936 		char *path;
2937 		int pad;
2938 		off_t length;
2939 	} */ *uap;
2940 {
2941 
2942 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2943 }
2944 
2945 int
2946 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2947 {
2948 	struct mount *mp;
2949 	struct vnode *vp;
2950 	struct vattr vattr;
2951 	int error;
2952 	struct nameidata nd;
2953 	int vfslocked;
2954 
2955 	if (length < 0)
2956 		return(EINVAL);
2957 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2958 	if ((error = namei(&nd)) != 0)
2959 		return (error);
2960 	vfslocked = NDHASGIANT(&nd);
2961 	vp = nd.ni_vp;
2962 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2963 		vrele(vp);
2964 		VFS_UNLOCK_GIANT(vfslocked);
2965 		return (error);
2966 	}
2967 	NDFREE(&nd, NDF_ONLY_PNBUF);
2968 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2969 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2970 	if (vp->v_type == VDIR)
2971 		error = EISDIR;
2972 #ifdef MAC
2973 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2974 	}
2975 #endif
2976 	else if ((error = vn_writechk(vp)) == 0 &&
2977 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2978 		VATTR_NULL(&vattr);
2979 		vattr.va_size = length;
2980 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2981 	}
2982 	vput(vp);
2983 	vn_finished_write(mp);
2984 	VFS_UNLOCK_GIANT(vfslocked);
2985 	return (error);
2986 }
2987 
2988 /*
2989  * Truncate a file given a file descriptor.
2990  */
2991 #ifndef _SYS_SYSPROTO_H_
2992 struct ftruncate_args {
2993 	int	fd;
2994 	int	pad;
2995 	off_t	length;
2996 };
2997 #endif
2998 int
2999 ftruncate(td, uap)
3000 	struct thread *td;
3001 	register struct ftruncate_args /* {
3002 		int fd;
3003 		int pad;
3004 		off_t length;
3005 	} */ *uap;
3006 {
3007 	struct mount *mp;
3008 	struct vattr vattr;
3009 	struct vnode *vp;
3010 	struct file *fp;
3011 	int vfslocked;
3012 	int error;
3013 
3014 	if (uap->length < 0)
3015 		return(EINVAL);
3016 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3017 		return (error);
3018 	if ((fp->f_flag & FWRITE) == 0) {
3019 		fdrop(fp, td);
3020 		return (EINVAL);
3021 	}
3022 	vp = fp->f_vnode;
3023 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3024 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3025 		goto drop;
3026 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3027 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3028 	if (vp->v_type == VDIR)
3029 		error = EISDIR;
3030 #ifdef MAC
3031 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3032 	    vp))) {
3033 	}
3034 #endif
3035 	else if ((error = vn_writechk(vp)) == 0) {
3036 		VATTR_NULL(&vattr);
3037 		vattr.va_size = uap->length;
3038 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3039 	}
3040 	VOP_UNLOCK(vp, 0, td);
3041 	vn_finished_write(mp);
3042 drop:
3043 	VFS_UNLOCK_GIANT(vfslocked);
3044 	fdrop(fp, td);
3045 	return (error);
3046 }
3047 
3048 #if defined(COMPAT_43)
3049 /*
3050  * Truncate a file given its path name.
3051  */
3052 #ifndef _SYS_SYSPROTO_H_
3053 struct otruncate_args {
3054 	char	*path;
3055 	long	length;
3056 };
3057 #endif
3058 int
3059 otruncate(td, uap)
3060 	struct thread *td;
3061 	register struct otruncate_args /* {
3062 		char *path;
3063 		long length;
3064 	} */ *uap;
3065 {
3066 	struct truncate_args /* {
3067 		char *path;
3068 		int pad;
3069 		off_t length;
3070 	} */ nuap;
3071 
3072 	nuap.path = uap->path;
3073 	nuap.length = uap->length;
3074 	return (truncate(td, &nuap));
3075 }
3076 
3077 /*
3078  * Truncate a file given a file descriptor.
3079  */
3080 #ifndef _SYS_SYSPROTO_H_
3081 struct oftruncate_args {
3082 	int	fd;
3083 	long	length;
3084 };
3085 #endif
3086 int
3087 oftruncate(td, uap)
3088 	struct thread *td;
3089 	register struct oftruncate_args /* {
3090 		int fd;
3091 		long length;
3092 	} */ *uap;
3093 {
3094 	struct ftruncate_args /* {
3095 		int fd;
3096 		int pad;
3097 		off_t length;
3098 	} */ nuap;
3099 
3100 	nuap.fd = uap->fd;
3101 	nuap.length = uap->length;
3102 	return (ftruncate(td, &nuap));
3103 }
3104 #endif /* COMPAT_43 */
3105 
3106 /*
3107  * Sync an open file.
3108  */
3109 #ifndef _SYS_SYSPROTO_H_
3110 struct fsync_args {
3111 	int	fd;
3112 };
3113 #endif
3114 int
3115 fsync(td, uap)
3116 	struct thread *td;
3117 	struct fsync_args /* {
3118 		int fd;
3119 	} */ *uap;
3120 {
3121 	struct vnode *vp;
3122 	struct mount *mp;
3123 	struct file *fp;
3124 	int vfslocked;
3125 	int error;
3126 
3127 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3128 		return (error);
3129 	vp = fp->f_vnode;
3130 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3131 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3132 		goto drop;
3133 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3134 	if (vp->v_object != NULL) {
3135 		VM_OBJECT_LOCK(vp->v_object);
3136 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3137 		VM_OBJECT_UNLOCK(vp->v_object);
3138 	}
3139 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3140 
3141 	VOP_UNLOCK(vp, 0, td);
3142 	vn_finished_write(mp);
3143 drop:
3144 	VFS_UNLOCK_GIANT(vfslocked);
3145 	fdrop(fp, td);
3146 	return (error);
3147 }
3148 
3149 /*
3150  * Rename files.  Source and destination must either both be directories,
3151  * or both not be directories.  If target is a directory, it must be empty.
3152  */
3153 #ifndef _SYS_SYSPROTO_H_
3154 struct rename_args {
3155 	char	*from;
3156 	char	*to;
3157 };
3158 #endif
3159 int
3160 rename(td, uap)
3161 	struct thread *td;
3162 	register struct rename_args /* {
3163 		char *from;
3164 		char *to;
3165 	} */ *uap;
3166 {
3167 
3168 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3169 }
3170 
3171 int
3172 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3173 {
3174 	struct mount *mp = NULL;
3175 	struct vnode *tvp, *fvp, *tdvp;
3176 	struct nameidata fromnd, tond;
3177 	int tvfslocked;
3178 	int fvfslocked;
3179 	int error;
3180 
3181 	bwillwrite();
3182 #ifdef MAC
3183 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3184 	    pathseg, from, td);
3185 #else
3186 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3187 	    pathseg, from, td);
3188 #endif
3189 	if ((error = namei(&fromnd)) != 0)
3190 		return (error);
3191 	fvfslocked = NDHASGIANT(&fromnd);
3192 	tvfslocked = 0;
3193 #ifdef MAC
3194 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3195 	    fromnd.ni_vp, &fromnd.ni_cnd);
3196 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3197 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3198 #endif
3199 	fvp = fromnd.ni_vp;
3200 	if (error == 0)
3201 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3202 	if (error != 0) {
3203 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3204 		vrele(fromnd.ni_dvp);
3205 		vrele(fvp);
3206 		goto out1;
3207 	}
3208 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3209 	    MPSAFE, pathseg, to, td);
3210 	if (fromnd.ni_vp->v_type == VDIR)
3211 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3212 	if ((error = namei(&tond)) != 0) {
3213 		/* Translate error code for rename("dir1", "dir2/."). */
3214 		if (error == EISDIR && fvp->v_type == VDIR)
3215 			error = EINVAL;
3216 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3217 		vrele(fromnd.ni_dvp);
3218 		vrele(fvp);
3219 		vn_finished_write(mp);
3220 		goto out1;
3221 	}
3222 	tvfslocked = NDHASGIANT(&tond);
3223 	tdvp = tond.ni_dvp;
3224 	tvp = tond.ni_vp;
3225 	if (tvp != NULL) {
3226 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3227 			error = ENOTDIR;
3228 			goto out;
3229 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3230 			error = EISDIR;
3231 			goto out;
3232 		}
3233 	}
3234 	if (fvp == tdvp)
3235 		error = EINVAL;
3236 	/*
3237 	 * If the source is the same as the destination (that is, if they
3238 	 * are links to the same vnode), then there is nothing to do.
3239 	 */
3240 	if (fvp == tvp)
3241 		error = -1;
3242 #ifdef MAC
3243 	else
3244 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3245 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3246 #endif
3247 out:
3248 	if (!error) {
3249 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3250 		if (fromnd.ni_dvp != tdvp) {
3251 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3252 		}
3253 		if (tvp) {
3254 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3255 		}
3256 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3257 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3258 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3259 		NDFREE(&tond, NDF_ONLY_PNBUF);
3260 	} else {
3261 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3262 		NDFREE(&tond, NDF_ONLY_PNBUF);
3263 		if (tvp)
3264 			vput(tvp);
3265 		if (tdvp == tvp)
3266 			vrele(tdvp);
3267 		else
3268 			vput(tdvp);
3269 		vrele(fromnd.ni_dvp);
3270 		vrele(fvp);
3271 	}
3272 	vrele(tond.ni_startdir);
3273 	vn_finished_write(mp);
3274 out1:
3275 	if (fromnd.ni_startdir)
3276 		vrele(fromnd.ni_startdir);
3277 	VFS_UNLOCK_GIANT(fvfslocked);
3278 	VFS_UNLOCK_GIANT(tvfslocked);
3279 	if (error == -1)
3280 		return (0);
3281 	return (error);
3282 }
3283 
3284 /*
3285  * Make a directory file.
3286  */
3287 #ifndef _SYS_SYSPROTO_H_
3288 struct mkdir_args {
3289 	char	*path;
3290 	int	mode;
3291 };
3292 #endif
3293 int
3294 mkdir(td, uap)
3295 	struct thread *td;
3296 	register struct mkdir_args /* {
3297 		char *path;
3298 		int mode;
3299 	} */ *uap;
3300 {
3301 
3302 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3303 }
3304 
3305 int
3306 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3307 {
3308 	struct mount *mp;
3309 	struct vnode *vp;
3310 	struct vattr vattr;
3311 	int error;
3312 	struct nameidata nd;
3313 	int vfslocked;
3314 
3315 restart:
3316 	bwillwrite();
3317 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3318 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3319 	if ((error = namei(&nd)) != 0)
3320 		return (error);
3321 	vfslocked = NDHASGIANT(&nd);
3322 	vp = nd.ni_vp;
3323 	if (vp != NULL) {
3324 		NDFREE(&nd, NDF_ONLY_PNBUF);
3325 		vrele(vp);
3326 		/*
3327 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3328 		 * the strange behaviour of leaving the vnode unlocked
3329 		 * if the target is the same vnode as the parent.
3330 		 */
3331 		if (vp == nd.ni_dvp)
3332 			vrele(nd.ni_dvp);
3333 		else
3334 			vput(nd.ni_dvp);
3335 		VFS_UNLOCK_GIANT(vfslocked);
3336 		return (EEXIST);
3337 	}
3338 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3339 		NDFREE(&nd, NDF_ONLY_PNBUF);
3340 		vput(nd.ni_dvp);
3341 		VFS_UNLOCK_GIANT(vfslocked);
3342 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3343 			return (error);
3344 		goto restart;
3345 	}
3346 	VATTR_NULL(&vattr);
3347 	vattr.va_type = VDIR;
3348 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3349 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3350 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3351 #ifdef MAC
3352 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3353 	    &vattr);
3354 	if (error)
3355 		goto out;
3356 #endif
3357 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3358 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3359 #ifdef MAC
3360 out:
3361 #endif
3362 	NDFREE(&nd, NDF_ONLY_PNBUF);
3363 	vput(nd.ni_dvp);
3364 	if (!error)
3365 		vput(nd.ni_vp);
3366 	vn_finished_write(mp);
3367 	VFS_UNLOCK_GIANT(vfslocked);
3368 	return (error);
3369 }
3370 
3371 /*
3372  * Remove a directory file.
3373  */
3374 #ifndef _SYS_SYSPROTO_H_
3375 struct rmdir_args {
3376 	char	*path;
3377 };
3378 #endif
3379 int
3380 rmdir(td, uap)
3381 	struct thread *td;
3382 	struct rmdir_args /* {
3383 		char *path;
3384 	} */ *uap;
3385 {
3386 
3387 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3388 }
3389 
3390 int
3391 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3392 {
3393 	struct mount *mp;
3394 	struct vnode *vp;
3395 	int error;
3396 	struct nameidata nd;
3397 	int vfslocked;
3398 
3399 restart:
3400 	bwillwrite();
3401 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3402 	if ((error = namei(&nd)) != 0)
3403 		return (error);
3404 	vfslocked = NDHASGIANT(&nd);
3405 	vp = nd.ni_vp;
3406 	if (vp->v_type != VDIR) {
3407 		error = ENOTDIR;
3408 		goto out;
3409 	}
3410 	/*
3411 	 * No rmdir "." please.
3412 	 */
3413 	if (nd.ni_dvp == vp) {
3414 		error = EINVAL;
3415 		goto out;
3416 	}
3417 	/*
3418 	 * The root of a mounted filesystem cannot be deleted.
3419 	 */
3420 	if (vp->v_vflag & VV_ROOT) {
3421 		error = EBUSY;
3422 		goto out;
3423 	}
3424 #ifdef MAC
3425 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3426 	    &nd.ni_cnd);
3427 	if (error)
3428 		goto out;
3429 #endif
3430 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3431 		NDFREE(&nd, NDF_ONLY_PNBUF);
3432 		if (nd.ni_dvp == vp)
3433 			vrele(nd.ni_dvp);
3434 		else
3435 			vput(nd.ni_dvp);
3436 		vput(vp);
3437 		VFS_UNLOCK_GIANT(vfslocked);
3438 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3439 			return (error);
3440 		goto restart;
3441 	}
3442 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3443 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3444 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3445 	vn_finished_write(mp);
3446 out:
3447 	NDFREE(&nd, NDF_ONLY_PNBUF);
3448 	if (nd.ni_dvp == vp)
3449 		vrele(nd.ni_dvp);
3450 	else
3451 		vput(nd.ni_dvp);
3452 	vput(vp);
3453 	VFS_UNLOCK_GIANT(vfslocked);
3454 	return (error);
3455 }
3456 
3457 #ifdef COMPAT_43
3458 /*
3459  * Read a block of directory entries in a filesystem independent format.
3460  */
3461 #ifndef _SYS_SYSPROTO_H_
3462 struct ogetdirentries_args {
3463 	int	fd;
3464 	char	*buf;
3465 	u_int	count;
3466 	long	*basep;
3467 };
3468 #endif
3469 int
3470 ogetdirentries(td, uap)
3471 	struct thread *td;
3472 	register struct ogetdirentries_args /* {
3473 		int fd;
3474 		char *buf;
3475 		u_int count;
3476 		long *basep;
3477 	} */ *uap;
3478 {
3479 	struct vnode *vp;
3480 	struct file *fp;
3481 	struct uio auio, kuio;
3482 	struct iovec aiov, kiov;
3483 	struct dirent *dp, *edp;
3484 	caddr_t dirbuf;
3485 	int error, eofflag, readcnt;
3486 	long loff;
3487 
3488 	/* XXX arbitrary sanity limit on `count'. */
3489 	if (uap->count > 64 * 1024)
3490 		return (EINVAL);
3491 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3492 		return (error);
3493 	if ((fp->f_flag & FREAD) == 0) {
3494 		fdrop(fp, td);
3495 		return (EBADF);
3496 	}
3497 	vp = fp->f_vnode;
3498 unionread:
3499 	if (vp->v_type != VDIR) {
3500 		fdrop(fp, td);
3501 		return (EINVAL);
3502 	}
3503 	aiov.iov_base = uap->buf;
3504 	aiov.iov_len = uap->count;
3505 	auio.uio_iov = &aiov;
3506 	auio.uio_iovcnt = 1;
3507 	auio.uio_rw = UIO_READ;
3508 	auio.uio_segflg = UIO_USERSPACE;
3509 	auio.uio_td = td;
3510 	auio.uio_resid = uap->count;
3511 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3512 	loff = auio.uio_offset = fp->f_offset;
3513 #ifdef MAC
3514 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3515 	if (error) {
3516 		VOP_UNLOCK(vp, 0, td);
3517 		fdrop(fp, td);
3518 		return (error);
3519 	}
3520 #endif
3521 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3522 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3523 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3524 			    NULL, NULL);
3525 			fp->f_offset = auio.uio_offset;
3526 		} else
3527 #	endif
3528 	{
3529 		kuio = auio;
3530 		kuio.uio_iov = &kiov;
3531 		kuio.uio_segflg = UIO_SYSSPACE;
3532 		kiov.iov_len = uap->count;
3533 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3534 		kiov.iov_base = dirbuf;
3535 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3536 			    NULL, NULL);
3537 		fp->f_offset = kuio.uio_offset;
3538 		if (error == 0) {
3539 			readcnt = uap->count - kuio.uio_resid;
3540 			edp = (struct dirent *)&dirbuf[readcnt];
3541 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3542 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3543 					/*
3544 					 * The expected low byte of
3545 					 * dp->d_namlen is our dp->d_type.
3546 					 * The high MBZ byte of dp->d_namlen
3547 					 * is our dp->d_namlen.
3548 					 */
3549 					dp->d_type = dp->d_namlen;
3550 					dp->d_namlen = 0;
3551 #				else
3552 					/*
3553 					 * The dp->d_type is the high byte
3554 					 * of the expected dp->d_namlen,
3555 					 * so must be zero'ed.
3556 					 */
3557 					dp->d_type = 0;
3558 #				endif
3559 				if (dp->d_reclen > 0) {
3560 					dp = (struct dirent *)
3561 					    ((char *)dp + dp->d_reclen);
3562 				} else {
3563 					error = EIO;
3564 					break;
3565 				}
3566 			}
3567 			if (dp >= edp)
3568 				error = uiomove(dirbuf, readcnt, &auio);
3569 		}
3570 		FREE(dirbuf, M_TEMP);
3571 	}
3572 	VOP_UNLOCK(vp, 0, td);
3573 	if (error) {
3574 		fdrop(fp, td);
3575 		return (error);
3576 	}
3577 	if (uap->count == auio.uio_resid) {
3578 		if (union_dircheckp) {
3579 			error = union_dircheckp(td, &vp, fp);
3580 			if (error == -1)
3581 				goto unionread;
3582 			if (error) {
3583 				fdrop(fp, td);
3584 				return (error);
3585 			}
3586 		}
3587 		/*
3588 		 * XXX We could delay dropping the lock above but
3589 		 * union_dircheckp complicates things.
3590 		 */
3591 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3592 		if ((vp->v_vflag & VV_ROOT) &&
3593 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3594 			struct vnode *tvp = vp;
3595 			vp = vp->v_mount->mnt_vnodecovered;
3596 			VREF(vp);
3597 			fp->f_vnode = vp;
3598 			fp->f_data = vp;
3599 			fp->f_offset = 0;
3600 			vput(tvp);
3601 			goto unionread;
3602 		}
3603 		VOP_UNLOCK(vp, 0, td);
3604 	}
3605 	error = copyout(&loff, uap->basep, sizeof(long));
3606 	fdrop(fp, td);
3607 	td->td_retval[0] = uap->count - auio.uio_resid;
3608 	return (error);
3609 }
3610 #endif /* COMPAT_43 */
3611 
3612 /*
3613  * Read a block of directory entries in a filesystem independent format.
3614  */
3615 #ifndef _SYS_SYSPROTO_H_
3616 struct getdirentries_args {
3617 	int	fd;
3618 	char	*buf;
3619 	u_int	count;
3620 	long	*basep;
3621 };
3622 #endif
3623 int
3624 getdirentries(td, uap)
3625 	struct thread *td;
3626 	register struct getdirentries_args /* {
3627 		int fd;
3628 		char *buf;
3629 		u_int count;
3630 		long *basep;
3631 	} */ *uap;
3632 {
3633 	struct vnode *vp;
3634 	struct file *fp;
3635 	struct uio auio;
3636 	struct iovec aiov;
3637 	int vfslocked;
3638 	long loff;
3639 	int error, eofflag;
3640 
3641 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3642 		return (error);
3643 	if ((fp->f_flag & FREAD) == 0) {
3644 		fdrop(fp, td);
3645 		return (EBADF);
3646 	}
3647 	vp = fp->f_vnode;
3648 unionread:
3649 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3650 	if (vp->v_type != VDIR) {
3651 		error = EINVAL;
3652 		goto fail;
3653 	}
3654 	aiov.iov_base = uap->buf;
3655 	aiov.iov_len = uap->count;
3656 	auio.uio_iov = &aiov;
3657 	auio.uio_iovcnt = 1;
3658 	auio.uio_rw = UIO_READ;
3659 	auio.uio_segflg = UIO_USERSPACE;
3660 	auio.uio_td = td;
3661 	auio.uio_resid = uap->count;
3662 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3663 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3664 	loff = auio.uio_offset = fp->f_offset;
3665 #ifdef MAC
3666 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3667 	if (error == 0)
3668 #endif
3669 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3670 		    NULL);
3671 	fp->f_offset = auio.uio_offset;
3672 	VOP_UNLOCK(vp, 0, td);
3673 	if (error)
3674 		goto fail;
3675 	if (uap->count == auio.uio_resid) {
3676 		if (union_dircheckp) {
3677 			error = union_dircheckp(td, &vp, fp);
3678 			if (error == -1) {
3679 				VFS_UNLOCK_GIANT(vfslocked);
3680 				goto unionread;
3681 			}
3682 			if (error)
3683 				goto fail;
3684 		}
3685 		/*
3686 		 * XXX We could delay dropping the lock above but
3687 		 * union_dircheckp complicates things.
3688 		 */
3689 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3690 		if ((vp->v_vflag & VV_ROOT) &&
3691 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3692 			struct vnode *tvp = vp;
3693 			vp = vp->v_mount->mnt_vnodecovered;
3694 			VREF(vp);
3695 			fp->f_vnode = vp;
3696 			fp->f_data = vp;
3697 			fp->f_offset = 0;
3698 			vput(tvp);
3699 			VFS_UNLOCK_GIANT(vfslocked);
3700 			goto unionread;
3701 		}
3702 		VOP_UNLOCK(vp, 0, td);
3703 	}
3704 	if (uap->basep != NULL) {
3705 		error = copyout(&loff, uap->basep, sizeof(long));
3706 	}
3707 	td->td_retval[0] = uap->count - auio.uio_resid;
3708 fail:
3709 	VFS_UNLOCK_GIANT(vfslocked);
3710 	fdrop(fp, td);
3711 	return (error);
3712 }
3713 #ifndef _SYS_SYSPROTO_H_
3714 struct getdents_args {
3715 	int fd;
3716 	char *buf;
3717 	size_t count;
3718 };
3719 #endif
3720 int
3721 getdents(td, uap)
3722 	struct thread *td;
3723 	register struct getdents_args /* {
3724 		int fd;
3725 		char *buf;
3726 		u_int count;
3727 	} */ *uap;
3728 {
3729 	struct getdirentries_args ap;
3730 	ap.fd = uap->fd;
3731 	ap.buf = uap->buf;
3732 	ap.count = uap->count;
3733 	ap.basep = NULL;
3734 	return (getdirentries(td, &ap));
3735 }
3736 
3737 /*
3738  * Set the mode mask for creation of filesystem nodes.
3739  *
3740  * MP SAFE
3741  */
3742 #ifndef _SYS_SYSPROTO_H_
3743 struct umask_args {
3744 	int	newmask;
3745 };
3746 #endif
3747 int
3748 umask(td, uap)
3749 	struct thread *td;
3750 	struct umask_args /* {
3751 		int newmask;
3752 	} */ *uap;
3753 {
3754 	register struct filedesc *fdp;
3755 
3756 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3757 	fdp = td->td_proc->p_fd;
3758 	td->td_retval[0] = fdp->fd_cmask;
3759 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3760 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3761 	return (0);
3762 }
3763 
3764 /*
3765  * Void all references to file by ripping underlying filesystem
3766  * away from vnode.
3767  */
3768 #ifndef _SYS_SYSPROTO_H_
3769 struct revoke_args {
3770 	char	*path;
3771 };
3772 #endif
3773 int
3774 revoke(td, uap)
3775 	struct thread *td;
3776 	register struct revoke_args /* {
3777 		char *path;
3778 	} */ *uap;
3779 {
3780 	struct vnode *vp;
3781 	struct vattr vattr;
3782 	int error;
3783 	struct nameidata nd;
3784 	int vfslocked;
3785 
3786 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3787 	    uap->path, td);
3788 	if ((error = namei(&nd)) != 0)
3789 		return (error);
3790 	vfslocked = NDHASGIANT(&nd);
3791 	vp = nd.ni_vp;
3792 	NDFREE(&nd, NDF_ONLY_PNBUF);
3793 	if (vp->v_type != VCHR) {
3794 		error = EINVAL;
3795 		goto out;
3796 	}
3797 #ifdef MAC
3798 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3799 	if (error)
3800 		goto out;
3801 #endif
3802 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3803 	if (error)
3804 		goto out;
3805 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3806 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3807 		if (error)
3808 			goto out;
3809 	}
3810 	if (vcount(vp) > 1)
3811 		VOP_REVOKE(vp, REVOKEALL);
3812 out:
3813 	vput(vp);
3814 	VFS_UNLOCK_GIANT(vfslocked);
3815 	return (error);
3816 }
3817 
3818 /*
3819  * Convert a user file descriptor to a kernel file entry.
3820  * A reference on the file entry is held upon returning.
3821  */
3822 int
3823 getvnode(fdp, fd, fpp)
3824 	struct filedesc *fdp;
3825 	int fd;
3826 	struct file **fpp;
3827 {
3828 	int error;
3829 	struct file *fp;
3830 
3831 	fp = NULL;
3832 	if (fdp == NULL)
3833 		error = EBADF;
3834 	else {
3835 		FILEDESC_LOCK(fdp);
3836 		if ((u_int)fd >= fdp->fd_nfiles ||
3837 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3838 			error = EBADF;
3839 		else if (fp->f_vnode == NULL) {
3840 			fp = NULL;
3841 			error = EINVAL;
3842 		} else {
3843 			fhold(fp);
3844 			error = 0;
3845 		}
3846 		FILEDESC_UNLOCK(fdp);
3847 	}
3848 	*fpp = fp;
3849 	return (error);
3850 }
3851 
3852 /*
3853  * Get (NFS) file handle
3854  */
3855 #ifndef _SYS_SYSPROTO_H_
3856 struct lgetfh_args {
3857 	char	*fname;
3858 	fhandle_t *fhp;
3859 };
3860 #endif
3861 int
3862 lgetfh(td, uap)
3863 	struct thread *td;
3864 	register struct lgetfh_args *uap;
3865 {
3866 	struct nameidata nd;
3867 	fhandle_t fh;
3868 	register struct vnode *vp;
3869 	int vfslocked;
3870 	int error;
3871 
3872 	error = suser(td);
3873 	if (error)
3874 		return (error);
3875 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3876 	    UIO_USERSPACE, uap->fname, td);
3877 	error = namei(&nd);
3878 	if (error)
3879 		return (error);
3880 	vfslocked = NDHASGIANT(&nd);
3881 	NDFREE(&nd, NDF_ONLY_PNBUF);
3882 	vp = nd.ni_vp;
3883 	bzero(&fh, sizeof(fh));
3884 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3885 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3886 	vput(vp);
3887 	VFS_UNLOCK_GIANT(vfslocked);
3888 	if (error)
3889 		return (error);
3890 	error = copyout(&fh, uap->fhp, sizeof (fh));
3891 	return (error);
3892 }
3893 
3894 #ifndef _SYS_SYSPROTO_H_
3895 struct getfh_args {
3896 	char	*fname;
3897 	fhandle_t *fhp;
3898 };
3899 #endif
3900 int
3901 getfh(td, uap)
3902 	struct thread *td;
3903 	register struct getfh_args *uap;
3904 {
3905 	struct nameidata nd;
3906 	fhandle_t fh;
3907 	register struct vnode *vp;
3908 	int vfslocked;
3909 	int error;
3910 
3911 	error = suser(td);
3912 	if (error)
3913 		return (error);
3914 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3915 	    UIO_USERSPACE, uap->fname, td);
3916 	error = namei(&nd);
3917 	if (error)
3918 		return (error);
3919 	vfslocked = NDHASGIANT(&nd);
3920 	NDFREE(&nd, NDF_ONLY_PNBUF);
3921 	vp = nd.ni_vp;
3922 	bzero(&fh, sizeof(fh));
3923 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3924 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3925 	vput(vp);
3926 	VFS_UNLOCK_GIANT(vfslocked);
3927 	if (error)
3928 		return (error);
3929 	error = copyout(&fh, uap->fhp, sizeof (fh));
3930 	return (error);
3931 }
3932 
3933 /*
3934  * syscall for the rpc.lockd to use to translate a NFS file handle into
3935  * an open descriptor.
3936  *
3937  * warning: do not remove the suser() call or this becomes one giant
3938  * security hole.
3939  *
3940  * MP SAFE
3941  */
3942 #ifndef _SYS_SYSPROTO_H_
3943 struct fhopen_args {
3944 	const struct fhandle *u_fhp;
3945 	int flags;
3946 };
3947 #endif
3948 int
3949 fhopen(td, uap)
3950 	struct thread *td;
3951 	struct fhopen_args /* {
3952 		const struct fhandle *u_fhp;
3953 		int flags;
3954 	} */ *uap;
3955 {
3956 	struct proc *p = td->td_proc;
3957 	struct mount *mp;
3958 	struct vnode *vp;
3959 	struct fhandle fhp;
3960 	struct vattr vat;
3961 	struct vattr *vap = &vat;
3962 	struct flock lf;
3963 	struct file *fp;
3964 	register struct filedesc *fdp = p->p_fd;
3965 	int fmode, mode, error, type;
3966 	struct file *nfp;
3967 	int indx;
3968 
3969 	error = suser(td);
3970 	if (error)
3971 		return (error);
3972 	fmode = FFLAGS(uap->flags);
3973 	/* why not allow a non-read/write open for our lockd? */
3974 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3975 		return (EINVAL);
3976 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3977 	if (error)
3978 		return(error);
3979 	/* find the mount point */
3980 	mtx_lock(&Giant);
3981 	mp = vfs_getvfs(&fhp.fh_fsid);
3982 	if (mp == NULL) {
3983 		error = ESTALE;
3984 		goto out;
3985 	}
3986 	/* now give me my vnode, it gets returned to me locked */
3987 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3988 	if (error)
3989 		goto out;
3990 	/*
3991 	 * from now on we have to make sure not
3992 	 * to forget about the vnode
3993 	 * any error that causes an abort must vput(vp)
3994 	 * just set error = err and 'goto bad;'.
3995 	 */
3996 
3997 	/*
3998 	 * from vn_open
3999 	 */
4000 	if (vp->v_type == VLNK) {
4001 		error = EMLINK;
4002 		goto bad;
4003 	}
4004 	if (vp->v_type == VSOCK) {
4005 		error = EOPNOTSUPP;
4006 		goto bad;
4007 	}
4008 	mode = 0;
4009 	if (fmode & (FWRITE | O_TRUNC)) {
4010 		if (vp->v_type == VDIR) {
4011 			error = EISDIR;
4012 			goto bad;
4013 		}
4014 		error = vn_writechk(vp);
4015 		if (error)
4016 			goto bad;
4017 		mode |= VWRITE;
4018 	}
4019 	if (fmode & FREAD)
4020 		mode |= VREAD;
4021 	if (fmode & O_APPEND)
4022 		mode |= VAPPEND;
4023 #ifdef MAC
4024 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4025 	if (error)
4026 		goto bad;
4027 #endif
4028 	if (mode) {
4029 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4030 		if (error)
4031 			goto bad;
4032 	}
4033 	if (fmode & O_TRUNC) {
4034 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4035 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4036 			vrele(vp);
4037 			goto out;
4038 		}
4039 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4040 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4041 #ifdef MAC
4042 		/*
4043 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4044 		 * should be right.
4045 		 */
4046 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4047 		if (error == 0) {
4048 #endif
4049 			VATTR_NULL(vap);
4050 			vap->va_size = 0;
4051 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4052 #ifdef MAC
4053 		}
4054 #endif
4055 		vn_finished_write(mp);
4056 		if (error)
4057 			goto bad;
4058 	}
4059 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4060 	if (error)
4061 		goto bad;
4062 
4063 	if (fmode & FWRITE)
4064 		vp->v_writecount++;
4065 
4066 	/*
4067 	 * end of vn_open code
4068 	 */
4069 
4070 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4071 		if (fmode & FWRITE)
4072 			vp->v_writecount--;
4073 		goto bad;
4074 	}
4075 	/* An extra reference on `nfp' has been held for us by falloc(). */
4076 	fp = nfp;
4077 
4078 	nfp->f_vnode = vp;
4079 	nfp->f_data = vp;
4080 	nfp->f_flag = fmode & FMASK;
4081 	nfp->f_ops = &vnops;
4082 	nfp->f_type = DTYPE_VNODE;
4083 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4084 		lf.l_whence = SEEK_SET;
4085 		lf.l_start = 0;
4086 		lf.l_len = 0;
4087 		if (fmode & O_EXLOCK)
4088 			lf.l_type = F_WRLCK;
4089 		else
4090 			lf.l_type = F_RDLCK;
4091 		type = F_FLOCK;
4092 		if ((fmode & FNONBLOCK) == 0)
4093 			type |= F_WAIT;
4094 		VOP_UNLOCK(vp, 0, td);
4095 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4096 			    type)) != 0) {
4097 			/*
4098 			 * The lock request failed.  Normally close the
4099 			 * descriptor but handle the case where someone might
4100 			 * have dup()d or close()d it when we weren't looking.
4101 			 */
4102 			fdclose(fdp, fp, indx, td);
4103 
4104 			/*
4105 			 * release our private reference
4106 			 */
4107 			fdrop(fp, td);
4108 			goto out;
4109 		}
4110 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4111 		fp->f_flag |= FHASLOCK;
4112 	}
4113 
4114 	VOP_UNLOCK(vp, 0, td);
4115 	fdrop(fp, td);
4116 	mtx_unlock(&Giant);
4117 	td->td_retval[0] = indx;
4118 	return (0);
4119 
4120 bad:
4121 	vput(vp);
4122 out:
4123 	mtx_unlock(&Giant);
4124 	return (error);
4125 }
4126 
4127 /*
4128  * Stat an (NFS) file handle.
4129  *
4130  * MP SAFE
4131  */
4132 #ifndef _SYS_SYSPROTO_H_
4133 struct fhstat_args {
4134 	struct fhandle *u_fhp;
4135 	struct stat *sb;
4136 };
4137 #endif
4138 int
4139 fhstat(td, uap)
4140 	struct thread *td;
4141 	register struct fhstat_args /* {
4142 		struct fhandle *u_fhp;
4143 		struct stat *sb;
4144 	} */ *uap;
4145 {
4146 	struct stat sb;
4147 	fhandle_t fh;
4148 	struct mount *mp;
4149 	struct vnode *vp;
4150 	int error;
4151 
4152 	error = suser(td);
4153 	if (error)
4154 		return (error);
4155 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4156 	if (error)
4157 		return (error);
4158 	mtx_lock(&Giant);
4159 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4160 		mtx_unlock(&Giant);
4161 		return (ESTALE);
4162 	}
4163 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4164 		mtx_unlock(&Giant);
4165 		return (error);
4166 	}
4167 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4168 	vput(vp);
4169 	mtx_unlock(&Giant);
4170 	if (error)
4171 		return (error);
4172 	error = copyout(&sb, uap->sb, sizeof(sb));
4173 	return (error);
4174 }
4175 
4176 /*
4177  * Implement fstatfs() for (NFS) file handles.
4178  *
4179  * MP SAFE
4180  */
4181 #ifndef _SYS_SYSPROTO_H_
4182 struct fhstatfs_args {
4183 	struct fhandle *u_fhp;
4184 	struct statfs *buf;
4185 };
4186 #endif
4187 int
4188 fhstatfs(td, uap)
4189 	struct thread *td;
4190 	struct fhstatfs_args /* {
4191 		struct fhandle *u_fhp;
4192 		struct statfs *buf;
4193 	} */ *uap;
4194 {
4195 	struct statfs sf;
4196 	fhandle_t fh;
4197 	int error;
4198 
4199 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4200 	if (error)
4201 		return (error);
4202 	error = kern_fhstatfs(td, fh, &sf);
4203 	if (error)
4204 		return (error);
4205 	return (copyout(&sf, uap->buf, sizeof(sf)));
4206 }
4207 
4208 int
4209 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4210 {
4211 	struct statfs *sp;
4212 	struct mount *mp;
4213 	struct vnode *vp;
4214 	int error;
4215 
4216 	error = suser(td);
4217 	if (error)
4218 		return (error);
4219 	mtx_lock(&Giant);
4220 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4221 		mtx_unlock(&Giant);
4222 		return (ESTALE);
4223 	}
4224 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4225 	if (error) {
4226 		mtx_unlock(&Giant);
4227 		return (error);
4228 	}
4229 	mp = vp->v_mount;
4230 	sp = &mp->mnt_stat;
4231 	vput(vp);
4232 	error = prison_canseemount(td->td_ucred, mp);
4233 	if (error)
4234 		return (error);
4235 #ifdef MAC
4236 	error = mac_check_mount_stat(td->td_ucred, mp);
4237 	if (error) {
4238 		mtx_unlock(&Giant);
4239 		return (error);
4240 	}
4241 #endif
4242 	/*
4243 	 * Set these in case the underlying filesystem fails to do so.
4244 	 */
4245 	sp->f_version = STATFS_VERSION;
4246 	sp->f_namemax = NAME_MAX;
4247 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4248 	error = VFS_STATFS(mp, sp, td);
4249 	mtx_unlock(&Giant);
4250 	if (error)
4251 		return (error);
4252 	*buf = *sp;
4253 	return (0);
4254 }
4255 
4256 /*
4257  * Syscall to push extended attribute configuration information into the
4258  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4259  * a command (int cmd), and attribute name and misc data.  For now, the
4260  * attribute name is left in userspace for consumption by the VFS_op.
4261  * It will probably be changed to be copied into sysspace by the
4262  * syscall in the future, once issues with various consumers of the
4263  * attribute code have raised their hands.
4264  *
4265  * Currently this is used only by UFS Extended Attributes.
4266  */
4267 int
4268 extattrctl(td, uap)
4269 	struct thread *td;
4270 	struct extattrctl_args /* {
4271 		const char *path;
4272 		int cmd;
4273 		const char *filename;
4274 		int attrnamespace;
4275 		const char *attrname;
4276 	} */ *uap;
4277 {
4278 	struct vnode *filename_vp;
4279 	struct nameidata nd;
4280 	struct mount *mp, *mp_writable;
4281 	char attrname[EXTATTR_MAXNAMELEN];
4282 	int vfslocked, fnvfslocked, error;
4283 
4284 	/*
4285 	 * uap->attrname is not always defined.  We check again later when we
4286 	 * invoke the VFS call so as to pass in NULL there if needed.
4287 	 */
4288 	if (uap->attrname != NULL) {
4289 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4290 		    NULL);
4291 		if (error)
4292 			return (error);
4293 	}
4294 
4295 	vfslocked = fnvfslocked = 0;
4296 	/*
4297 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4298 	 * which VFS_EXTATTRCTL() will later release.
4299 	 */
4300 	filename_vp = NULL;
4301 	if (uap->filename != NULL) {
4302 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF,
4303 		    UIO_USERSPACE, uap->filename, td);
4304 		error = namei(&nd);
4305 		if (error)
4306 			return (error);
4307 		fnvfslocked = NDHASGIANT(&nd);
4308 		filename_vp = nd.ni_vp;
4309 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4310 	}
4311 
4312 	/* uap->path is always defined. */
4313 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4314 	error = namei(&nd);
4315 	if (error) {
4316 		if (filename_vp != NULL)
4317 			vput(filename_vp);
4318 		goto out;
4319 	}
4320 	vfslocked = NDHASGIANT(&nd);
4321 	mp = nd.ni_vp->v_mount;
4322 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4323 	NDFREE(&nd, 0);
4324 	if (error) {
4325 		if (filename_vp != NULL)
4326 			vput(filename_vp);
4327 		goto out;
4328 	}
4329 
4330 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4331 	    uap->attrname != NULL ? attrname : NULL, td);
4332 
4333 	vn_finished_write(mp_writable);
4334 	/*
4335 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4336 	 * filename_vp, so vrele it if it is defined.
4337 	 */
4338 	if (filename_vp != NULL)
4339 		vrele(filename_vp);
4340 out:
4341 	VFS_UNLOCK_GIANT(fnvfslocked);
4342 	VFS_UNLOCK_GIANT(vfslocked);
4343 	return (error);
4344 }
4345 
4346 /*-
4347  * Set a named extended attribute on a file or directory
4348  *
4349  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4350  *            kernelspace string pointer "attrname", userspace buffer
4351  *            pointer "data", buffer length "nbytes", thread "td".
4352  * Returns: 0 on success, an error number otherwise
4353  * Locks: none
4354  * References: vp must be a valid reference for the duration of the call
4355  */
4356 static int
4357 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4358     void *data, size_t nbytes, struct thread *td)
4359 {
4360 	struct mount *mp;
4361 	struct uio auio;
4362 	struct iovec aiov;
4363 	ssize_t cnt;
4364 	int error;
4365 
4366 	VFS_ASSERT_GIANT(vp->v_mount);
4367 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4368 	if (error)
4369 		return (error);
4370 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4371 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4372 
4373 	aiov.iov_base = data;
4374 	aiov.iov_len = nbytes;
4375 	auio.uio_iov = &aiov;
4376 	auio.uio_iovcnt = 1;
4377 	auio.uio_offset = 0;
4378 	if (nbytes > INT_MAX) {
4379 		error = EINVAL;
4380 		goto done;
4381 	}
4382 	auio.uio_resid = nbytes;
4383 	auio.uio_rw = UIO_WRITE;
4384 	auio.uio_segflg = UIO_USERSPACE;
4385 	auio.uio_td = td;
4386 	cnt = nbytes;
4387 
4388 #ifdef MAC
4389 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4390 	    attrname, &auio);
4391 	if (error)
4392 		goto done;
4393 #endif
4394 
4395 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4396 	    td->td_ucred, td);
4397 	cnt -= auio.uio_resid;
4398 	td->td_retval[0] = cnt;
4399 
4400 done:
4401 	VOP_UNLOCK(vp, 0, td);
4402 	vn_finished_write(mp);
4403 	return (error);
4404 }
4405 
4406 int
4407 extattr_set_fd(td, uap)
4408 	struct thread *td;
4409 	struct extattr_set_fd_args /* {
4410 		int fd;
4411 		int attrnamespace;
4412 		const char *attrname;
4413 		void *data;
4414 		size_t nbytes;
4415 	} */ *uap;
4416 {
4417 	struct file *fp;
4418 	char attrname[EXTATTR_MAXNAMELEN];
4419 	int vfslocked, error;
4420 
4421 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4422 	if (error)
4423 		return (error);
4424 
4425 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4426 	if (error)
4427 		return (error);
4428 
4429 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4430 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4431 	    attrname, uap->data, uap->nbytes, td);
4432 	fdrop(fp, td);
4433 	VFS_UNLOCK_GIANT(vfslocked);
4434 
4435 	return (error);
4436 }
4437 
4438 int
4439 extattr_set_file(td, uap)
4440 	struct thread *td;
4441 	struct extattr_set_file_args /* {
4442 		const char *path;
4443 		int attrnamespace;
4444 		const char *attrname;
4445 		void *data;
4446 		size_t nbytes;
4447 	} */ *uap;
4448 {
4449 	struct nameidata nd;
4450 	char attrname[EXTATTR_MAXNAMELEN];
4451 	int vfslocked, error;
4452 
4453 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4454 	if (error)
4455 		return (error);
4456 
4457 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4458 	error = namei(&nd);
4459 	if (error)
4460 		return (error);
4461 	NDFREE(&nd, NDF_ONLY_PNBUF);
4462 
4463 	vfslocked = NDHASGIANT(&nd);
4464 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4465 	    uap->data, uap->nbytes, td);
4466 
4467 	vrele(nd.ni_vp);
4468 	VFS_UNLOCK_GIANT(vfslocked);
4469 	return (error);
4470 }
4471 
4472 int
4473 extattr_set_link(td, uap)
4474 	struct thread *td;
4475 	struct extattr_set_link_args /* {
4476 		const char *path;
4477 		int attrnamespace;
4478 		const char *attrname;
4479 		void *data;
4480 		size_t nbytes;
4481 	} */ *uap;
4482 {
4483 	struct nameidata nd;
4484 	char attrname[EXTATTR_MAXNAMELEN];
4485 	int vfslocked, error;
4486 
4487 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4488 	if (error)
4489 		return (error);
4490 
4491 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4492 	error = namei(&nd);
4493 	if (error)
4494 		return (error);
4495 	NDFREE(&nd, NDF_ONLY_PNBUF);
4496 
4497 	vfslocked = NDHASGIANT(&nd);
4498 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4499 	    uap->data, uap->nbytes, td);
4500 
4501 	vrele(nd.ni_vp);
4502 	VFS_UNLOCK_GIANT(vfslocked);
4503 	return (error);
4504 }
4505 
4506 /*-
4507  * Get a named extended attribute on a file or directory
4508  *
4509  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4510  *            kernelspace string pointer "attrname", userspace buffer
4511  *            pointer "data", buffer length "nbytes", thread "td".
4512  * Returns: 0 on success, an error number otherwise
4513  * Locks: none
4514  * References: vp must be a valid reference for the duration of the call
4515  */
4516 static int
4517 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4518     void *data, size_t nbytes, struct thread *td)
4519 {
4520 	struct uio auio, *auiop;
4521 	struct iovec aiov;
4522 	ssize_t cnt;
4523 	size_t size, *sizep;
4524 	int error;
4525 
4526 	VFS_ASSERT_GIANT(vp->v_mount);
4527 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4528 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4529 
4530 	/*
4531 	 * Slightly unusual semantics: if the user provides a NULL data
4532 	 * pointer, they don't want to receive the data, just the
4533 	 * maximum read length.
4534 	 */
4535 	auiop = NULL;
4536 	sizep = NULL;
4537 	cnt = 0;
4538 	if (data != NULL) {
4539 		aiov.iov_base = data;
4540 		aiov.iov_len = nbytes;
4541 		auio.uio_iov = &aiov;
4542 		auio.uio_iovcnt = 1;
4543 		auio.uio_offset = 0;
4544 		if (nbytes > INT_MAX) {
4545 			error = EINVAL;
4546 			goto done;
4547 		}
4548 		auio.uio_resid = nbytes;
4549 		auio.uio_rw = UIO_READ;
4550 		auio.uio_segflg = UIO_USERSPACE;
4551 		auio.uio_td = td;
4552 		auiop = &auio;
4553 		cnt = nbytes;
4554 	} else
4555 		sizep = &size;
4556 
4557 #ifdef MAC
4558 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4559 	    attrname, &auio);
4560 	if (error)
4561 		goto done;
4562 #endif
4563 
4564 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4565 	    td->td_ucred, td);
4566 
4567 	if (auiop != NULL) {
4568 		cnt -= auio.uio_resid;
4569 		td->td_retval[0] = cnt;
4570 	} else
4571 		td->td_retval[0] = size;
4572 
4573 done:
4574 	VOP_UNLOCK(vp, 0, td);
4575 	return (error);
4576 }
4577 
4578 int
4579 extattr_get_fd(td, uap)
4580 	struct thread *td;
4581 	struct extattr_get_fd_args /* {
4582 		int fd;
4583 		int attrnamespace;
4584 		const char *attrname;
4585 		void *data;
4586 		size_t nbytes;
4587 	} */ *uap;
4588 {
4589 	struct file *fp;
4590 	char attrname[EXTATTR_MAXNAMELEN];
4591 	int vfslocked, error;
4592 
4593 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4594 	if (error)
4595 		return (error);
4596 
4597 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4598 	if (error)
4599 		return (error);
4600 
4601 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4602 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4603 	    attrname, uap->data, uap->nbytes, td);
4604 
4605 	fdrop(fp, td);
4606 	VFS_UNLOCK_GIANT(vfslocked);
4607 	return (error);
4608 }
4609 
4610 int
4611 extattr_get_file(td, uap)
4612 	struct thread *td;
4613 	struct extattr_get_file_args /* {
4614 		const char *path;
4615 		int attrnamespace;
4616 		const char *attrname;
4617 		void *data;
4618 		size_t nbytes;
4619 	} */ *uap;
4620 {
4621 	struct nameidata nd;
4622 	char attrname[EXTATTR_MAXNAMELEN];
4623 	int vfslocked, error;
4624 
4625 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4626 	if (error)
4627 		return (error);
4628 
4629 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4630 	error = namei(&nd);
4631 	if (error)
4632 		return (error);
4633 	NDFREE(&nd, NDF_ONLY_PNBUF);
4634 
4635 	vfslocked = NDHASGIANT(&nd);
4636 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4637 	    uap->data, uap->nbytes, td);
4638 
4639 	vrele(nd.ni_vp);
4640 	VFS_UNLOCK_GIANT(vfslocked);
4641 	return (error);
4642 }
4643 
4644 int
4645 extattr_get_link(td, uap)
4646 	struct thread *td;
4647 	struct extattr_get_link_args /* {
4648 		const char *path;
4649 		int attrnamespace;
4650 		const char *attrname;
4651 		void *data;
4652 		size_t nbytes;
4653 	} */ *uap;
4654 {
4655 	struct nameidata nd;
4656 	char attrname[EXTATTR_MAXNAMELEN];
4657 	int vfslocked, error;
4658 
4659 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4660 	if (error)
4661 		return (error);
4662 
4663 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4664 	error = namei(&nd);
4665 	if (error)
4666 		return (error);
4667 	NDFREE(&nd, NDF_ONLY_PNBUF);
4668 
4669 	vfslocked = NDHASGIANT(&nd);
4670 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4671 	    uap->data, uap->nbytes, td);
4672 
4673 	vrele(nd.ni_vp);
4674 	VFS_UNLOCK_GIANT(vfslocked);
4675 	return (error);
4676 }
4677 
4678 /*
4679  * extattr_delete_vp(): Delete a named extended attribute on a file or
4680  *                      directory
4681  *
4682  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4683  *            kernelspace string pointer "attrname", proc "p"
4684  * Returns: 0 on success, an error number otherwise
4685  * Locks: none
4686  * References: vp must be a valid reference for the duration of the call
4687  */
4688 static int
4689 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4690     struct thread *td)
4691 {
4692 	struct mount *mp;
4693 	int error;
4694 
4695 	VFS_ASSERT_GIANT(vp->v_mount);
4696 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4697 	if (error)
4698 		return (error);
4699 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4700 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4701 
4702 #ifdef MAC
4703 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4704 	    attrname);
4705 	if (error)
4706 		goto done;
4707 #endif
4708 
4709 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4710 	    td);
4711 	if (error == EOPNOTSUPP)
4712 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4713 		    td->td_ucred, td);
4714 #ifdef MAC
4715 done:
4716 #endif
4717 	VOP_UNLOCK(vp, 0, td);
4718 	vn_finished_write(mp);
4719 	return (error);
4720 }
4721 
4722 int
4723 extattr_delete_fd(td, uap)
4724 	struct thread *td;
4725 	struct extattr_delete_fd_args /* {
4726 		int fd;
4727 		int attrnamespace;
4728 		const char *attrname;
4729 	} */ *uap;
4730 {
4731 	struct file *fp;
4732 	char attrname[EXTATTR_MAXNAMELEN];
4733 	int vfslocked, error;
4734 
4735 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4736 	if (error)
4737 		return (error);
4738 
4739 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4740 	if (error)
4741 		return (error);
4742 
4743 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4744 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4745 	    attrname, td);
4746 	fdrop(fp, td);
4747 	VFS_UNLOCK_GIANT(vfslocked);
4748 	return (error);
4749 }
4750 
4751 int
4752 extattr_delete_file(td, uap)
4753 	struct thread *td;
4754 	struct extattr_delete_file_args /* {
4755 		const char *path;
4756 		int attrnamespace;
4757 		const char *attrname;
4758 	} */ *uap;
4759 {
4760 	struct nameidata nd;
4761 	char attrname[EXTATTR_MAXNAMELEN];
4762 	int vfslocked, error;
4763 
4764 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4765 	if (error)
4766 		return(error);
4767 
4768 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4769 	error = namei(&nd);
4770 	if (error)
4771 		return(error);
4772 	NDFREE(&nd, NDF_ONLY_PNBUF);
4773 
4774 	vfslocked = NDHASGIANT(&nd);
4775 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4776 	vrele(nd.ni_vp);
4777 	VFS_UNLOCK_GIANT(vfslocked);
4778 	return(error);
4779 }
4780 
4781 int
4782 extattr_delete_link(td, uap)
4783 	struct thread *td;
4784 	struct extattr_delete_link_args /* {
4785 		const char *path;
4786 		int attrnamespace;
4787 		const char *attrname;
4788 	} */ *uap;
4789 {
4790 	struct nameidata nd;
4791 	char attrname[EXTATTR_MAXNAMELEN];
4792 	int vfslocked, error;
4793 
4794 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4795 	if (error)
4796 		return(error);
4797 
4798 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4799 	error = namei(&nd);
4800 	if (error)
4801 		return(error);
4802 	NDFREE(&nd, NDF_ONLY_PNBUF);
4803 
4804 	vfslocked = NDHASGIANT(&nd);
4805 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4806 	vrele(nd.ni_vp);
4807 	VFS_UNLOCK_GIANT(vfslocked);
4808 	return(error);
4809 }
4810 
4811 /*-
4812  * Retrieve a list of extended attributes on a file or directory.
4813  *
4814  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4815  *            userspace buffer pointer "data", buffer length "nbytes",
4816  *            thread "td".
4817  * Returns: 0 on success, an error number otherwise
4818  * Locks: none
4819  * References: vp must be a valid reference for the duration of the call
4820  */
4821 static int
4822 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4823     size_t nbytes, struct thread *td)
4824 {
4825 	struct uio auio, *auiop;
4826 	size_t size, *sizep;
4827 	struct iovec aiov;
4828 	ssize_t cnt;
4829 	int error;
4830 
4831 	VFS_ASSERT_GIANT(vp->v_mount);
4832 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4833 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4834 
4835 	auiop = NULL;
4836 	sizep = NULL;
4837 	cnt = 0;
4838 	if (data != NULL) {
4839 		aiov.iov_base = data;
4840 		aiov.iov_len = nbytes;
4841 		auio.uio_iov = &aiov;
4842 		auio.uio_iovcnt = 1;
4843 		auio.uio_offset = 0;
4844 		if (nbytes > INT_MAX) {
4845 			error = EINVAL;
4846 			goto done;
4847 		}
4848 		auio.uio_resid = nbytes;
4849 		auio.uio_rw = UIO_READ;
4850 		auio.uio_segflg = UIO_USERSPACE;
4851 		auio.uio_td = td;
4852 		auiop = &auio;
4853 		cnt = nbytes;
4854 	} else
4855 		sizep = &size;
4856 
4857 #ifdef MAC
4858 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4859 	if (error)
4860 		goto done;
4861 #endif
4862 
4863 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4864 	    td->td_ucred, td);
4865 
4866 	if (auiop != NULL) {
4867 		cnt -= auio.uio_resid;
4868 		td->td_retval[0] = cnt;
4869 	} else
4870 		td->td_retval[0] = size;
4871 
4872 done:
4873 	VOP_UNLOCK(vp, 0, td);
4874 	return (error);
4875 }
4876 
4877 
4878 int
4879 extattr_list_fd(td, uap)
4880 	struct thread *td;
4881 	struct extattr_list_fd_args /* {
4882 		int fd;
4883 		int attrnamespace;
4884 		void *data;
4885 		size_t nbytes;
4886 	} */ *uap;
4887 {
4888 	struct file *fp;
4889 	int vfslocked, error;
4890 
4891 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4892 	if (error)
4893 		return (error);
4894 
4895 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4896 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4897 	    uap->nbytes, td);
4898 
4899 	fdrop(fp, td);
4900 	VFS_UNLOCK_GIANT(vfslocked);
4901 	return (error);
4902 }
4903 
4904 int
4905 extattr_list_file(td, uap)
4906 	struct thread*td;
4907 	struct extattr_list_file_args /* {
4908 		const char *path;
4909 		int attrnamespace;
4910 		void *data;
4911 		size_t nbytes;
4912 	} */ *uap;
4913 {
4914 	struct nameidata nd;
4915 	int vfslocked, error;
4916 
4917 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4918 	error = namei(&nd);
4919 	if (error)
4920 		return (error);
4921 	NDFREE(&nd, NDF_ONLY_PNBUF);
4922 
4923 	vfslocked = NDHASGIANT(&nd);
4924 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4925 	    uap->nbytes, td);
4926 
4927 	vrele(nd.ni_vp);
4928 	VFS_UNLOCK_GIANT(vfslocked);
4929 	return (error);
4930 }
4931 
4932 int
4933 extattr_list_link(td, uap)
4934 	struct thread*td;
4935 	struct extattr_list_link_args /* {
4936 		const char *path;
4937 		int attrnamespace;
4938 		void *data;
4939 		size_t nbytes;
4940 	} */ *uap;
4941 {
4942 	struct nameidata nd;
4943 	int vfslocked, error;
4944 
4945 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4946 	error = namei(&nd);
4947 	if (error)
4948 		return (error);
4949 	NDFREE(&nd, NDF_ONLY_PNBUF);
4950 
4951 	vfslocked = NDHASGIANT(&nd);
4952 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4953 	    uap->nbytes, td);
4954 
4955 	vrele(nd.ni_vp);
4956 	VFS_UNLOCK_GIANT(vfslocked);
4957 	return (error);
4958 }
4959