xref: /freebsd/sys/kern/vfs_syscalls.c (revision c0b9f4fe659b6839541970eb5675e57f4d814969)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/uma.h>
77 
78 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81 static int setfmode(struct thread *td, struct vnode *, int);
82 static int setfflags(struct thread *td, struct vnode *, int);
83 static int setutimes(struct thread *td, struct vnode *,
84     const struct timespec *, int, int);
85 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86     struct thread *td);
87 
88 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89     size_t nbytes, struct thread *td);
90 
91 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92 
93 /*
94  * The module initialization routine for POSIX asynchronous I/O will
95  * set this to the version of AIO that it implements.  (Zero means
96  * that it is not implemented.)  This value is used here by pathconf()
97  * and in kern_descrip.c by fpathconf().
98  */
99 int async_io_version;
100 
101 /*
102  * Sync each mounted filesystem.
103  */
104 #ifndef _SYS_SYSPROTO_H_
105 struct sync_args {
106 	int     dummy;
107 };
108 #endif
109 
110 #ifdef DEBUG
111 static int syncprt = 0;
112 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
113 #endif
114 
115 /* ARGSUSED */
116 int
117 sync(td, uap)
118 	struct thread *td;
119 	struct sync_args *uap;
120 {
121 	struct mount *mp, *nmp;
122 	int asyncflag;
123 
124 	mtx_lock(&Giant);
125 	mtx_lock(&mountlist_mtx);
126 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128 			nmp = TAILQ_NEXT(mp, mnt_list);
129 			continue;
130 		}
131 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133 			asyncflag = mp->mnt_flag & MNT_ASYNC;
134 			mp->mnt_flag &= ~MNT_ASYNC;
135 			vfs_msync(mp, MNT_NOWAIT);
136 			VFS_SYNC(mp, MNT_NOWAIT, td);
137 			mp->mnt_flag |= asyncflag;
138 			vn_finished_write(mp);
139 		}
140 		mtx_lock(&mountlist_mtx);
141 		nmp = TAILQ_NEXT(mp, mnt_list);
142 		vfs_unbusy(mp, td);
143 	}
144 	mtx_unlock(&mountlist_mtx);
145 #if 0
146 /*
147  * XXX don't call vfs_bufstats() yet because that routine
148  * was not imported in the Lite2 merge.
149  */
150 #ifdef DIAGNOSTIC
151 	if (syncprt)
152 		vfs_bufstats();
153 #endif /* DIAGNOSTIC */
154 #endif
155 	mtx_unlock(&Giant);
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  *
168  * MP SAFE
169  */
170 #ifndef _SYS_SYSPROTO_H_
171 struct quotactl_args {
172 	char *path;
173 	int cmd;
174 	int uid;
175 	caddr_t arg;
176 };
177 #endif
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		char *path;
183 		int cmd;
184 		int uid;
185 		caddr_t arg;
186 	} */ *uap;
187 {
188 	struct mount *mp, *vmp;
189 	int error;
190 	struct nameidata nd;
191 
192 	if (jailed(td->td_ucred) && !prison_quotas)
193 		return (EPERM);
194 	mtx_lock(&Giant);
195 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
196 	if ((error = namei(&nd)) != 0) {
197 		mtx_unlock(&Giant);
198 		return (error);
199 	}
200 	NDFREE(&nd, NDF_ONLY_PNBUF);
201 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
202 	mp = nd.ni_vp->v_mount;
203 	vrele(nd.ni_vp);
204 	if (error) {
205 		mtx_unlock(&Giant);
206 		return (error);
207 	}
208 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
209 	vn_finished_write(vmp);
210 	mtx_unlock(&Giant);
211 	return (error);
212 }
213 
214 /*
215  * Get filesystem statistics.
216  */
217 #ifndef _SYS_SYSPROTO_H_
218 struct statfs_args {
219 	char *path;
220 	struct statfs *buf;
221 };
222 #endif
223 int
224 statfs(td, uap)
225 	struct thread *td;
226 	register struct statfs_args /* {
227 		char *path;
228 		struct statfs *buf;
229 	} */ *uap;
230 {
231 	struct statfs sf;
232 	int error;
233 
234 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235 	if (error == 0)
236 		error = copyout(&sf, uap->buf, sizeof(sf));
237 	return (error);
238 }
239 
240 int
241 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242     struct statfs *buf)
243 {
244 	struct mount *mp;
245 	struct statfs *sp, sb;
246 	int error;
247 	struct nameidata nd;
248 
249 	mtx_lock(&Giant);
250 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
251 	error = namei(&nd);
252 	if (error) {
253 		mtx_unlock(&Giant);
254 		return (error);
255 	}
256 	mp = nd.ni_vp->v_mount;
257 	sp = &mp->mnt_stat;
258 	NDFREE(&nd, NDF_ONLY_PNBUF);
259 	vrele(nd.ni_vp);
260 #ifdef MAC
261 	error = mac_check_mount_stat(td->td_ucred, mp);
262 	if (error) {
263 		mtx_unlock(&Giant);
264 		return (error);
265 	}
266 #endif
267 	/*
268 	 * Set these in case the underlying filesystem fails to do so.
269 	 */
270 	sp->f_version = STATFS_VERSION;
271 	sp->f_namemax = NAME_MAX;
272 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
273 	error = VFS_STATFS(mp, sp, td);
274 	if (error) {
275 		mtx_unlock(&Giant);
276 		return (error);
277 	}
278 	if (suser(td)) {
279 		bcopy(sp, &sb, sizeof(sb));
280 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
281 		prison_enforce_statfs(td->td_ucred, mp, &sb);
282 		sp = &sb;
283 	}
284 	mtx_unlock(&Giant);
285 	*buf = *sp;
286 	return (0);
287 }
288 
289 /*
290  * Get filesystem statistics.
291  */
292 #ifndef _SYS_SYSPROTO_H_
293 struct fstatfs_args {
294 	int fd;
295 	struct statfs *buf;
296 };
297 #endif
298 int
299 fstatfs(td, uap)
300 	struct thread *td;
301 	register struct fstatfs_args /* {
302 		int fd;
303 		struct statfs *buf;
304 	} */ *uap;
305 {
306 	struct statfs sf;
307 	int error;
308 
309 	error = kern_fstatfs(td, uap->fd, &sf);
310 	if (error == 0)
311 		error = copyout(&sf, uap->buf, sizeof(sf));
312 	return (error);
313 }
314 
315 int
316 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
317 {
318 	struct file *fp;
319 	struct mount *mp;
320 	struct statfs *sp, sb;
321 	int error;
322 
323 	error = getvnode(td->td_proc->p_fd, fd, &fp);
324 	if (error)
325 		return (error);
326 	mp = fp->f_vnode->v_mount;
327 	fdrop(fp, td);
328 	if (mp == NULL)
329 		return (EBADF);
330 	mtx_lock(&Giant);
331 #ifdef MAC
332 	error = mac_check_mount_stat(td->td_ucred, mp);
333 	if (error) {
334 		mtx_unlock(&Giant);
335 		return (error);
336 	}
337 #endif
338 	sp = &mp->mnt_stat;
339 	/*
340 	 * Set these in case the underlying filesystem fails to do so.
341 	 */
342 	sp->f_version = STATFS_VERSION;
343 	sp->f_namemax = NAME_MAX;
344 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
345 	error = VFS_STATFS(mp, sp, td);
346 	if (error) {
347 		mtx_unlock(&Giant);
348 		return (error);
349 	}
350 	if (suser(td)) {
351 		bcopy(sp, &sb, sizeof(sb));
352 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
353 		prison_enforce_statfs(td->td_ucred, mp, &sb);
354 		sp = &sb;
355 	}
356 	mtx_unlock(&Giant);
357 	*buf = *sp;
358 	return (0);
359 }
360 
361 /*
362  * Get statistics on all filesystems.
363  */
364 #ifndef _SYS_SYSPROTO_H_
365 struct getfsstat_args {
366 	struct statfs *buf;
367 	long bufsize;
368 	int flags;
369 };
370 #endif
371 int
372 getfsstat(td, uap)
373 	struct thread *td;
374 	register struct getfsstat_args /* {
375 		struct statfs *buf;
376 		long bufsize;
377 		int flags;
378 	} */ *uap;
379 {
380 
381 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
382 	    uap->flags));
383 }
384 
385 /*
386  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
387  * 	The caller is responsible for freeing memory which will be allocated
388  *	in '*buf'.
389  */
390 int
391 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
392     enum uio_seg bufseg, int flags)
393 {
394 	struct mount *mp, *nmp;
395 	struct statfs *sfsp, *sp, sb;
396 	size_t count, maxcount;
397 	int error;
398 
399 	maxcount = bufsize / sizeof(struct statfs);
400 	if (bufsize == 0)
401 		sfsp = NULL;
402 	else if (bufseg == UIO_USERSPACE)
403 		sfsp = *buf;
404 	else /* if (bufseg == UIO_SYSSPACE) */ {
405 		count = 0;
406 		mtx_lock(&mountlist_mtx);
407 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
408 			count++;
409 		}
410 		mtx_unlock(&mountlist_mtx);
411 		if (maxcount > count)
412 			maxcount = count;
413 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
414 		    M_WAITOK);
415 	}
416 	count = 0;
417 	mtx_lock(&Giant);
418 	mtx_lock(&mountlist_mtx);
419 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
420 		if (prison_canseemount(td->td_ucred, mp) != 0) {
421 			nmp = TAILQ_NEXT(mp, mnt_list);
422 			continue;
423 		}
424 #ifdef MAC
425 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
426 			nmp = TAILQ_NEXT(mp, mnt_list);
427 			continue;
428 		}
429 #endif
430 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
431 			nmp = TAILQ_NEXT(mp, mnt_list);
432 			continue;
433 		}
434 		if (sfsp && count < maxcount) {
435 			sp = &mp->mnt_stat;
436 			/*
437 			 * Set these in case the underlying filesystem
438 			 * fails to do so.
439 			 */
440 			sp->f_version = STATFS_VERSION;
441 			sp->f_namemax = NAME_MAX;
442 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
443 			/*
444 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
445 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
446 			 * overrides MNT_WAIT.
447 			 */
448 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
449 			    (flags & MNT_WAIT)) &&
450 			    (error = VFS_STATFS(mp, sp, td))) {
451 				mtx_lock(&mountlist_mtx);
452 				nmp = TAILQ_NEXT(mp, mnt_list);
453 				vfs_unbusy(mp, td);
454 				continue;
455 			}
456 			if (suser(td)) {
457 				bcopy(sp, &sb, sizeof(sb));
458 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
459 				prison_enforce_statfs(td->td_ucred, mp, &sb);
460 				sp = &sb;
461 			}
462 			if (bufseg == UIO_SYSSPACE)
463 				bcopy(sp, sfsp, sizeof(*sp));
464 			else /* if (bufseg == UIO_USERSPACE) */ {
465 				error = copyout(sp, sfsp, sizeof(*sp));
466 				if (error) {
467 					vfs_unbusy(mp, td);
468 					mtx_unlock(&Giant);
469 					return (error);
470 				}
471 			}
472 			sfsp++;
473 		}
474 		count++;
475 		mtx_lock(&mountlist_mtx);
476 		nmp = TAILQ_NEXT(mp, mnt_list);
477 		vfs_unbusy(mp, td);
478 	}
479 	mtx_unlock(&mountlist_mtx);
480 	mtx_unlock(&Giant);
481 	if (sfsp && count > maxcount)
482 		td->td_retval[0] = maxcount;
483 	else
484 		td->td_retval[0] = count;
485 	return (0);
486 }
487 
488 #ifdef COMPAT_FREEBSD4
489 /*
490  * Get old format filesystem statistics.
491  */
492 static void cvtstatfs(struct statfs *, struct ostatfs *);
493 
494 #ifndef _SYS_SYSPROTO_H_
495 struct freebsd4_statfs_args {
496 	char *path;
497 	struct ostatfs *buf;
498 };
499 #endif
500 int
501 freebsd4_statfs(td, uap)
502 	struct thread *td;
503 	struct freebsd4_statfs_args /* {
504 		char *path;
505 		struct ostatfs *buf;
506 	} */ *uap;
507 {
508 	struct ostatfs osb;
509 	struct statfs sf;
510 	int error;
511 
512 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
513 	if (error)
514 		return (error);
515 	cvtstatfs(&sf, &osb);
516 	return (copyout(&osb, uap->buf, sizeof(osb)));
517 }
518 
519 /*
520  * Get filesystem statistics.
521  */
522 #ifndef _SYS_SYSPROTO_H_
523 struct freebsd4_fstatfs_args {
524 	int fd;
525 	struct ostatfs *buf;
526 };
527 #endif
528 int
529 freebsd4_fstatfs(td, uap)
530 	struct thread *td;
531 	struct freebsd4_fstatfs_args /* {
532 		int fd;
533 		struct ostatfs *buf;
534 	} */ *uap;
535 {
536 	struct ostatfs osb;
537 	struct statfs sf;
538 	int error;
539 
540 	error = kern_fstatfs(td, uap->fd, &sf);
541 	if (error)
542 		return (error);
543 	cvtstatfs(&sf, &osb);
544 	return (copyout(&osb, uap->buf, sizeof(osb)));
545 }
546 
547 /*
548  * Get statistics on all filesystems.
549  */
550 #ifndef _SYS_SYSPROTO_H_
551 struct freebsd4_getfsstat_args {
552 	struct ostatfs *buf;
553 	long bufsize;
554 	int flags;
555 };
556 #endif
557 int
558 freebsd4_getfsstat(td, uap)
559 	struct thread *td;
560 	register struct freebsd4_getfsstat_args /* {
561 		struct ostatfs *buf;
562 		long bufsize;
563 		int flags;
564 	} */ *uap;
565 {
566 	struct statfs *buf, *sp;
567 	struct ostatfs osb;
568 	size_t count, size;
569 	int error;
570 
571 	count = uap->bufsize / sizeof(struct ostatfs);
572 	size = count * sizeof(struct statfs);
573 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
574 	if (size > 0) {
575 		count = td->td_retval[0];
576 		sp = buf;
577 		while (count > 0 && error == 0) {
578 			cvtstatfs(sp, &osb);
579 			error = copyout(&osb, uap->buf, sizeof(osb));
580 			sp++;
581 			uap->buf++;
582 			count--;
583 		}
584 		free(buf, M_TEMP);
585 	}
586 	return (error);
587 }
588 
589 /*
590  * Implement fstatfs() for (NFS) file handles.
591  */
592 #ifndef _SYS_SYSPROTO_H_
593 struct freebsd4_fhstatfs_args {
594 	struct fhandle *u_fhp;
595 	struct ostatfs *buf;
596 };
597 #endif
598 int
599 freebsd4_fhstatfs(td, uap)
600 	struct thread *td;
601 	struct freebsd4_fhstatfs_args /* {
602 		struct fhandle *u_fhp;
603 		struct ostatfs *buf;
604 	} */ *uap;
605 {
606 	struct ostatfs osb;
607 	struct statfs sf;
608 	fhandle_t fh;
609 	int error;
610 
611 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
612 	if (error)
613 		return (error);
614 	error = kern_fhstatfs(td, fh, &sf);
615 	if (error)
616 		return (error);
617 	cvtstatfs(&sf, &osb);
618 	return (copyout(&osb, uap->buf, sizeof(osb)));
619 }
620 
621 /*
622  * Convert a new format statfs structure to an old format statfs structure.
623  */
624 static void
625 cvtstatfs(nsp, osp)
626 	struct statfs *nsp;
627 	struct ostatfs *osp;
628 {
629 
630 	bzero(osp, sizeof(*osp));
631 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
632 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
633 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
634 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
635 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
636 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
637 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
638 	osp->f_owner = nsp->f_owner;
639 	osp->f_type = nsp->f_type;
640 	osp->f_flags = nsp->f_flags;
641 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
642 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
643 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
644 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
645 	bcopy(nsp->f_fstypename, osp->f_fstypename,
646 	    MIN(MFSNAMELEN, OMNAMELEN));
647 	bcopy(nsp->f_mntonname, osp->f_mntonname,
648 	    MIN(MFSNAMELEN, OMNAMELEN));
649 	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
650 	    MIN(MFSNAMELEN, OMNAMELEN));
651 	osp->f_fsid = nsp->f_fsid;
652 }
653 #endif /* COMPAT_FREEBSD4 */
654 
655 /*
656  * Change current working directory to a given file descriptor.
657  */
658 #ifndef _SYS_SYSPROTO_H_
659 struct fchdir_args {
660 	int	fd;
661 };
662 #endif
663 int
664 fchdir(td, uap)
665 	struct thread *td;
666 	struct fchdir_args /* {
667 		int fd;
668 	} */ *uap;
669 {
670 	register struct filedesc *fdp = td->td_proc->p_fd;
671 	struct vnode *vp, *tdp, *vpold;
672 	struct mount *mp;
673 	struct file *fp;
674 	int vfslocked;
675 	int error;
676 
677 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
678 		return (error);
679 	vp = fp->f_vnode;
680 	VREF(vp);
681 	fdrop(fp, td);
682 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
683 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
684 	if (vp->v_type != VDIR)
685 		error = ENOTDIR;
686 #ifdef MAC
687 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
688 	}
689 #endif
690 	else
691 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
692 	while (!error && (mp = vp->v_mountedhere) != NULL) {
693 		int tvfslocked;
694 		if (vfs_busy(mp, 0, 0, td))
695 			continue;
696 		tvfslocked = VFS_LOCK_GIANT(mp);
697 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
698 		vfs_unbusy(mp, td);
699 		if (error) {
700 			VFS_UNLOCK_GIANT(tvfslocked);
701 			break;
702 		}
703 		vput(vp);
704 		VFS_UNLOCK_GIANT(vfslocked);
705 		vp = tdp;
706 		vfslocked = tvfslocked;
707 	}
708 	if (error) {
709 		vput(vp);
710 		VFS_UNLOCK_GIANT(vfslocked);
711 		return (error);
712 	}
713 	VOP_UNLOCK(vp, 0, td);
714 	FILEDESC_LOCK_FAST(fdp);
715 	vpold = fdp->fd_cdir;
716 	fdp->fd_cdir = vp;
717 	FILEDESC_UNLOCK_FAST(fdp);
718 	vrele(vpold);
719 	VFS_UNLOCK_GIANT(vfslocked);
720 	return (0);
721 }
722 
723 /*
724  * Change current working directory (``.'').
725  */
726 #ifndef _SYS_SYSPROTO_H_
727 struct chdir_args {
728 	char	*path;
729 };
730 #endif
731 int
732 chdir(td, uap)
733 	struct thread *td;
734 	struct chdir_args /* {
735 		char *path;
736 	} */ *uap;
737 {
738 
739 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
740 }
741 
742 int
743 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
744 {
745 	register struct filedesc *fdp = td->td_proc->p_fd;
746 	int error;
747 	struct nameidata nd;
748 	struct vnode *vp;
749 	int vfslocked;
750 
751 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
752 	if ((error = namei(&nd)) != 0)
753 		return (error);
754 	vfslocked = NDHASGIANT(&nd);
755 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
756 		vput(nd.ni_vp);
757 		VFS_UNLOCK_GIANT(vfslocked);
758 		NDFREE(&nd, NDF_ONLY_PNBUF);
759 		return (error);
760 	}
761 	VOP_UNLOCK(nd.ni_vp, 0, td);
762 	NDFREE(&nd, NDF_ONLY_PNBUF);
763 	FILEDESC_LOCK_FAST(fdp);
764 	vp = fdp->fd_cdir;
765 	fdp->fd_cdir = nd.ni_vp;
766 	FILEDESC_UNLOCK_FAST(fdp);
767 	vrele(vp);
768 	VFS_UNLOCK_GIANT(vfslocked);
769 	return (0);
770 }
771 
772 /*
773  * Helper function for raised chroot(2) security function:  Refuse if
774  * any filedescriptors are open directories.
775  */
776 static int
777 chroot_refuse_vdir_fds(fdp)
778 	struct filedesc *fdp;
779 {
780 	struct vnode *vp;
781 	struct file *fp;
782 	int fd;
783 
784 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
785 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
786 		fp = fget_locked(fdp, fd);
787 		if (fp == NULL)
788 			continue;
789 		if (fp->f_type == DTYPE_VNODE) {
790 			vp = fp->f_vnode;
791 			if (vp->v_type == VDIR)
792 				return (EPERM);
793 		}
794 	}
795 	return (0);
796 }
797 
798 /*
799  * This sysctl determines if we will allow a process to chroot(2) if it
800  * has a directory open:
801  *	0: disallowed for all processes.
802  *	1: allowed for processes that were not already chroot(2)'ed.
803  *	2: allowed for all processes.
804  */
805 
806 static int chroot_allow_open_directories = 1;
807 
808 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
809      &chroot_allow_open_directories, 0, "");
810 
811 /*
812  * Change notion of root (``/'') directory.
813  */
814 #ifndef _SYS_SYSPROTO_H_
815 struct chroot_args {
816 	char	*path;
817 };
818 #endif
819 int
820 chroot(td, uap)
821 	struct thread *td;
822 	struct chroot_args /* {
823 		char *path;
824 	} */ *uap;
825 {
826 	int error;
827 	struct nameidata nd;
828 	int vfslocked;
829 
830 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
831 	if (error)
832 		return (error);
833 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
834 	    UIO_USERSPACE, uap->path, td);
835 	error = namei(&nd);
836 	if (error)
837 		goto error;
838 	vfslocked = NDHASGIANT(&nd);
839 	if ((error = change_dir(nd.ni_vp, td)) != 0)
840 		goto e_vunlock;
841 #ifdef MAC
842 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
843 		goto e_vunlock;
844 #endif
845 	VOP_UNLOCK(nd.ni_vp, 0, td);
846 	error = change_root(nd.ni_vp, td);
847 	vrele(nd.ni_vp);
848 	VFS_UNLOCK_GIANT(vfslocked);
849 	NDFREE(&nd, NDF_ONLY_PNBUF);
850 	return (error);
851 e_vunlock:
852 	vput(nd.ni_vp);
853 	VFS_UNLOCK_GIANT(vfslocked);
854 error:
855 	NDFREE(&nd, NDF_ONLY_PNBUF);
856 	return (error);
857 }
858 
859 /*
860  * Common routine for chroot and chdir.  Callers must provide a locked vnode
861  * instance.
862  */
863 int
864 change_dir(vp, td)
865 	struct vnode *vp;
866 	struct thread *td;
867 {
868 	int error;
869 
870 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
871 	if (vp->v_type != VDIR)
872 		return (ENOTDIR);
873 #ifdef MAC
874 	error = mac_check_vnode_chdir(td->td_ucred, vp);
875 	if (error)
876 		return (error);
877 #endif
878 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
879 	return (error);
880 }
881 
882 /*
883  * Common routine for kern_chroot() and jail_attach().  The caller is
884  * responsible for invoking suser() and mac_check_chroot() to authorize this
885  * operation.
886  */
887 int
888 change_root(vp, td)
889 	struct vnode *vp;
890 	struct thread *td;
891 {
892 	struct filedesc *fdp;
893 	struct vnode *oldvp;
894 	int error;
895 
896 	VFS_ASSERT_GIANT(vp->v_mount);
897 	fdp = td->td_proc->p_fd;
898 	FILEDESC_LOCK(fdp);
899 	if (chroot_allow_open_directories == 0 ||
900 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
901 		error = chroot_refuse_vdir_fds(fdp);
902 		if (error) {
903 			FILEDESC_UNLOCK(fdp);
904 			return (error);
905 		}
906 	}
907 	oldvp = fdp->fd_rdir;
908 	fdp->fd_rdir = vp;
909 	VREF(fdp->fd_rdir);
910 	if (!fdp->fd_jdir) {
911 		fdp->fd_jdir = vp;
912 		VREF(fdp->fd_jdir);
913 	}
914 	FILEDESC_UNLOCK(fdp);
915 	vrele(oldvp);
916 	return (0);
917 }
918 
919 /*
920  * Check permissions, allocate an open file structure,
921  * and call the device open routine if any.
922  *
923  * MP SAFE
924  */
925 #ifndef _SYS_SYSPROTO_H_
926 struct open_args {
927 	char	*path;
928 	int	flags;
929 	int	mode;
930 };
931 #endif
932 int
933 open(td, uap)
934 	struct thread *td;
935 	register struct open_args /* {
936 		char *path;
937 		int flags;
938 		int mode;
939 	} */ *uap;
940 {
941 	int error;
942 
943 	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
944 	if (mtx_owned(&Giant))
945 		printf("open: %s: %d\n", uap->path, error);
946 	return (error);
947 }
948 
949 int
950 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
951     int mode)
952 {
953 	struct proc *p = td->td_proc;
954 	struct filedesc *fdp = p->p_fd;
955 	struct file *fp;
956 	struct vnode *vp;
957 	struct vattr vat;
958 	struct mount *mp;
959 	int cmode;
960 	struct file *nfp;
961 	int type, indx, error;
962 	struct flock lf;
963 	struct nameidata nd;
964 	int vfslocked;
965 
966 	if ((flags & O_ACCMODE) == O_ACCMODE)
967 		return (EINVAL);
968 	flags = FFLAGS(flags);
969 	error = falloc(td, &nfp, &indx);
970 	if (error)
971 		return (error);
972 	/* An extra reference on `nfp' has been held for us by falloc(). */
973 	fp = nfp;
974 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
975 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
976 	td->td_dupfd = -1;		/* XXX check for fdopen */
977 	error = vn_open(&nd, &flags, cmode, indx);
978 	if (error) {
979 		/*
980 		 * If the vn_open replaced the method vector, something
981 		 * wonderous happened deep below and we just pass it up
982 		 * pretending we know what we do.
983 		 */
984 		if (error == ENXIO && fp->f_ops != &badfileops) {
985 			fdrop(fp, td);
986 			td->td_retval[0] = indx;
987 			return (0);
988 		}
989 
990 		/*
991 		 * release our own reference
992 		 */
993 		fdrop(fp, td);
994 
995 		/*
996 		 * handle special fdopen() case.  bleh.  dupfdopen() is
997 		 * responsible for dropping the old contents of ofiles[indx]
998 		 * if it succeeds.
999 		 */
1000 		if ((error == ENODEV || error == ENXIO) &&
1001 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1002 		    (error =
1003 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1004 			td->td_retval[0] = indx;
1005 			return (0);
1006 		}
1007 		/*
1008 		 * Clean up the descriptor, but only if another thread hadn't
1009 		 * replaced or closed it.
1010 		 */
1011 		fdclose(fdp, fp, indx, td);
1012 
1013 		if (error == ERESTART)
1014 			error = EINTR;
1015 		return (error);
1016 	}
1017 	td->td_dupfd = 0;
1018 	vfslocked = NDHASGIANT(&nd);
1019 	NDFREE(&nd, NDF_ONLY_PNBUF);
1020 	vp = nd.ni_vp;
1021 
1022 	/*
1023 	 * There should be 2 references on the file, one from the descriptor
1024 	 * table, and one for us.
1025 	 *
1026 	 * Handle the case where someone closed the file (via its file
1027 	 * descriptor) while we were blocked.  The end result should look
1028 	 * like opening the file succeeded but it was immediately closed.
1029 	 * We call vn_close() manually because we haven't yet hooked up
1030 	 * the various 'struct file' fields.
1031 	 */
1032 	FILEDESC_LOCK(fdp);
1033 	FILE_LOCK(fp);
1034 	if (fp->f_count == 1) {
1035 		mp = vp->v_mount;
1036 		KASSERT(fdp->fd_ofiles[indx] != fp,
1037 		    ("Open file descriptor lost all refs"));
1038 		FILE_UNLOCK(fp);
1039 		FILEDESC_UNLOCK(fdp);
1040 		VOP_UNLOCK(vp, 0, td);
1041 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1042 		VFS_UNLOCK_GIANT(vfslocked);
1043 		fdrop(fp, td);
1044 		td->td_retval[0] = indx;
1045 		return (0);
1046 	}
1047 	fp->f_vnode = vp;
1048 	if (fp->f_data == NULL)
1049 		fp->f_data = vp;
1050 	fp->f_flag = flags & FMASK;
1051 	if (fp->f_ops == &badfileops)
1052 		fp->f_ops = &vnops;
1053 	fp->f_seqcount = 1;
1054 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1055 	FILE_UNLOCK(fp);
1056 	FILEDESC_UNLOCK(fdp);
1057 
1058 	VOP_UNLOCK(vp, 0, td);
1059 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1060 		lf.l_whence = SEEK_SET;
1061 		lf.l_start = 0;
1062 		lf.l_len = 0;
1063 		if (flags & O_EXLOCK)
1064 			lf.l_type = F_WRLCK;
1065 		else
1066 			lf.l_type = F_RDLCK;
1067 		type = F_FLOCK;
1068 		if ((flags & FNONBLOCK) == 0)
1069 			type |= F_WAIT;
1070 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1071 			    type)) != 0)
1072 			goto bad;
1073 		fp->f_flag |= FHASLOCK;
1074 	}
1075 	if (flags & O_TRUNC) {
1076 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1077 			goto bad;
1078 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1079 		VATTR_NULL(&vat);
1080 		vat.va_size = 0;
1081 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1082 #ifdef MAC
1083 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1084 		if (error == 0)
1085 #endif
1086 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1087 		VOP_UNLOCK(vp, 0, td);
1088 		vn_finished_write(mp);
1089 		if (error)
1090 			goto bad;
1091 	}
1092 	VFS_UNLOCK_GIANT(vfslocked);
1093 	/*
1094 	 * Release our private reference, leaving the one associated with
1095 	 * the descriptor table intact.
1096 	 */
1097 	fdrop(fp, td);
1098 	td->td_retval[0] = indx;
1099 	return (0);
1100 bad:
1101 	VFS_UNLOCK_GIANT(vfslocked);
1102 	fdclose(fdp, fp, indx, td);
1103 	fdrop(fp, td);
1104 	return (error);
1105 }
1106 
1107 #ifdef COMPAT_43
1108 /*
1109  * Create a file.
1110  *
1111  * MP SAFE
1112  */
1113 #ifndef _SYS_SYSPROTO_H_
1114 struct ocreat_args {
1115 	char	*path;
1116 	int	mode;
1117 };
1118 #endif
1119 int
1120 ocreat(td, uap)
1121 	struct thread *td;
1122 	register struct ocreat_args /* {
1123 		char *path;
1124 		int mode;
1125 	} */ *uap;
1126 {
1127 
1128 	return (kern_open(td, uap->path, UIO_USERSPACE,
1129 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1130 }
1131 #endif /* COMPAT_43 */
1132 
1133 /*
1134  * Create a special file.
1135  */
1136 #ifndef _SYS_SYSPROTO_H_
1137 struct mknod_args {
1138 	char	*path;
1139 	int	mode;
1140 	int	dev;
1141 };
1142 #endif
1143 int
1144 mknod(td, uap)
1145 	struct thread *td;
1146 	register struct mknod_args /* {
1147 		char *path;
1148 		int mode;
1149 		int dev;
1150 	} */ *uap;
1151 {
1152 
1153 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1154 }
1155 
1156 int
1157 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1158     int dev)
1159 {
1160 	struct vnode *vp;
1161 	struct mount *mp;
1162 	struct vattr vattr;
1163 	int error;
1164 	int whiteout = 0;
1165 	struct nameidata nd;
1166 	int vfslocked;
1167 
1168 	switch (mode & S_IFMT) {
1169 	case S_IFCHR:
1170 	case S_IFBLK:
1171 		error = suser(td);
1172 		break;
1173 	default:
1174 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1175 		break;
1176 	}
1177 	if (error)
1178 		return (error);
1179 restart:
1180 	bwillwrite();
1181 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1182 	if ((error = namei(&nd)) != 0)
1183 		return (error);
1184 	vfslocked = NDHASGIANT(&nd);
1185 	vp = nd.ni_vp;
1186 	if (vp != NULL) {
1187 		NDFREE(&nd, NDF_ONLY_PNBUF);
1188 		vrele(vp);
1189 		if (vp == nd.ni_dvp)
1190 			vrele(nd.ni_dvp);
1191 		else
1192 			vput(nd.ni_dvp);
1193 		VFS_UNLOCK_GIANT(vfslocked);
1194 		return (EEXIST);
1195 	} else {
1196 		VATTR_NULL(&vattr);
1197 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1198 		vattr.va_mode = (mode & ALLPERMS) &
1199 		    ~td->td_proc->p_fd->fd_cmask;
1200 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1201 		vattr.va_rdev = dev;
1202 		whiteout = 0;
1203 
1204 		switch (mode & S_IFMT) {
1205 		case S_IFMT:	/* used by badsect to flag bad sectors */
1206 			vattr.va_type = VBAD;
1207 			break;
1208 		case S_IFCHR:
1209 			vattr.va_type = VCHR;
1210 			break;
1211 		case S_IFBLK:
1212 			vattr.va_type = VBLK;
1213 			break;
1214 		case S_IFWHT:
1215 			whiteout = 1;
1216 			break;
1217 		default:
1218 			error = EINVAL;
1219 			break;
1220 		}
1221 	}
1222 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1223 		NDFREE(&nd, NDF_ONLY_PNBUF);
1224 		vput(nd.ni_dvp);
1225 		VFS_UNLOCK_GIANT(vfslocked);
1226 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1227 			return (error);
1228 		goto restart;
1229 	}
1230 #ifdef MAC
1231 	if (error == 0 && !whiteout)
1232 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1233 		    &nd.ni_cnd, &vattr);
1234 #endif
1235 	if (!error) {
1236 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1237 		if (whiteout)
1238 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1239 		else {
1240 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1241 						&nd.ni_cnd, &vattr);
1242 			if (error == 0)
1243 				vput(nd.ni_vp);
1244 		}
1245 	}
1246 	NDFREE(&nd, NDF_ONLY_PNBUF);
1247 	vput(nd.ni_dvp);
1248 	vn_finished_write(mp);
1249 	VFS_UNLOCK_GIANT(vfslocked);
1250 	return (error);
1251 }
1252 
1253 /*
1254  * Create a named pipe.
1255  */
1256 #ifndef _SYS_SYSPROTO_H_
1257 struct mkfifo_args {
1258 	char	*path;
1259 	int	mode;
1260 };
1261 #endif
1262 int
1263 mkfifo(td, uap)
1264 	struct thread *td;
1265 	register struct mkfifo_args /* {
1266 		char *path;
1267 		int mode;
1268 	} */ *uap;
1269 {
1270 
1271 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1272 }
1273 
1274 int
1275 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1276 {
1277 	struct mount *mp;
1278 	struct vattr vattr;
1279 	int error;
1280 	struct nameidata nd;
1281 	int vfslocked;
1282 
1283 restart:
1284 	bwillwrite();
1285 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1286 	if ((error = namei(&nd)) != 0)
1287 		return (error);
1288 	vfslocked = NDHASGIANT(&nd);
1289 	if (nd.ni_vp != NULL) {
1290 		NDFREE(&nd, NDF_ONLY_PNBUF);
1291 		vrele(nd.ni_vp);
1292 		if (nd.ni_vp == nd.ni_dvp)
1293 			vrele(nd.ni_dvp);
1294 		else
1295 			vput(nd.ni_dvp);
1296 		VFS_UNLOCK_GIANT(vfslocked);
1297 		return (EEXIST);
1298 	}
1299 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1300 		NDFREE(&nd, NDF_ONLY_PNBUF);
1301 		vput(nd.ni_dvp);
1302 		VFS_UNLOCK_GIANT(vfslocked);
1303 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1304 			return (error);
1305 		goto restart;
1306 	}
1307 	VATTR_NULL(&vattr);
1308 	vattr.va_type = VFIFO;
1309 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1310 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1311 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1312 #ifdef MAC
1313 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1314 	    &vattr);
1315 	if (error)
1316 		goto out;
1317 #endif
1318 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1319 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1320 	if (error == 0)
1321 		vput(nd.ni_vp);
1322 #ifdef MAC
1323 out:
1324 #endif
1325 	vput(nd.ni_dvp);
1326 	vn_finished_write(mp);
1327 	VFS_UNLOCK_GIANT(vfslocked);
1328 	NDFREE(&nd, NDF_ONLY_PNBUF);
1329 	return (error);
1330 }
1331 
1332 /*
1333  * Make a hard file link.
1334  */
1335 #ifndef _SYS_SYSPROTO_H_
1336 struct link_args {
1337 	char	*path;
1338 	char	*link;
1339 };
1340 #endif
1341 int
1342 link(td, uap)
1343 	struct thread *td;
1344 	register struct link_args /* {
1345 		char *path;
1346 		char *link;
1347 	} */ *uap;
1348 {
1349 	int error;
1350 
1351 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1352 	return (error);
1353 }
1354 
1355 SYSCTL_DECL(_security_bsd);
1356 
1357 static int hardlink_check_uid = 0;
1358 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1359     &hardlink_check_uid, 0,
1360     "Unprivileged processes cannot create hard links to files owned by other "
1361     "users");
1362 static int hardlink_check_gid = 0;
1363 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1364     &hardlink_check_gid, 0,
1365     "Unprivileged processes cannot create hard links to files owned by other "
1366     "groups");
1367 
1368 static int
1369 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1370 {
1371 	struct vattr va;
1372 	int error;
1373 
1374 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1375 		return (0);
1376 
1377 	if (!hardlink_check_uid && !hardlink_check_gid)
1378 		return (0);
1379 
1380 	error = VOP_GETATTR(vp, &va, cred, td);
1381 	if (error != 0)
1382 		return (error);
1383 
1384 	if (hardlink_check_uid) {
1385 		if (cred->cr_uid != va.va_uid)
1386 			return (EPERM);
1387 	}
1388 
1389 	if (hardlink_check_gid) {
1390 		if (!groupmember(va.va_gid, cred))
1391 			return (EPERM);
1392 	}
1393 
1394 	return (0);
1395 }
1396 
1397 int
1398 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1399 {
1400 	struct vnode *vp;
1401 	struct mount *mp;
1402 	struct nameidata nd;
1403 	int vfslocked;
1404 	int lvfslocked;
1405 	int error;
1406 
1407 	bwillwrite();
1408 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td);
1409 	if ((error = namei(&nd)) != 0)
1410 		return (error);
1411 	vfslocked = NDHASGIANT(&nd);
1412 	NDFREE(&nd, NDF_ONLY_PNBUF);
1413 	vp = nd.ni_vp;
1414 	if (vp->v_type == VDIR) {
1415 		vrele(vp);
1416 		VFS_UNLOCK_GIANT(vfslocked);
1417 		return (EPERM);		/* POSIX */
1418 	}
1419 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1420 		vrele(vp);
1421 		VFS_UNLOCK_GIANT(vfslocked);
1422 		return (error);
1423 	}
1424 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td);
1425 	if ((error = namei(&nd)) == 0) {
1426 		lvfslocked = NDHASGIANT(&nd);
1427 		if (nd.ni_vp != NULL) {
1428 			vrele(nd.ni_vp);
1429 			if (nd.ni_dvp == nd.ni_vp)
1430 				vrele(nd.ni_dvp);
1431 			else
1432 				vput(nd.ni_dvp);
1433 			error = EEXIST;
1434 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1435 		    == 0) {
1436 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1437 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1438 			error = can_hardlink(vp, td, td->td_ucred);
1439 			if (error == 0)
1440 #ifdef MAC
1441 				error = mac_check_vnode_link(td->td_ucred,
1442 				    nd.ni_dvp, vp, &nd.ni_cnd);
1443 			if (error == 0)
1444 #endif
1445 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1446 			VOP_UNLOCK(vp, 0, td);
1447 			vput(nd.ni_dvp);
1448 		}
1449 		NDFREE(&nd, NDF_ONLY_PNBUF);
1450 		VFS_UNLOCK_GIANT(lvfslocked);
1451 	}
1452 	vrele(vp);
1453 	vn_finished_write(mp);
1454 	VFS_UNLOCK_GIANT(vfslocked);
1455 	return (error);
1456 }
1457 
1458 /*
1459  * Make a symbolic link.
1460  */
1461 #ifndef _SYS_SYSPROTO_H_
1462 struct symlink_args {
1463 	char	*path;
1464 	char	*link;
1465 };
1466 #endif
1467 int
1468 symlink(td, uap)
1469 	struct thread *td;
1470 	register struct symlink_args /* {
1471 		char *path;
1472 		char *link;
1473 	} */ *uap;
1474 {
1475 
1476 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1477 }
1478 
1479 int
1480 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1481 {
1482 	struct mount *mp;
1483 	struct vattr vattr;
1484 	char *syspath;
1485 	int error;
1486 	struct nameidata nd;
1487 	int vfslocked;
1488 
1489 	if (segflg == UIO_SYSSPACE) {
1490 		syspath = path;
1491 	} else {
1492 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1493 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1494 			goto out;
1495 	}
1496 restart:
1497 	bwillwrite();
1498 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE,
1499 	    segflg, link, td);
1500 	if ((error = namei(&nd)) != 0)
1501 		goto out;
1502 	vfslocked = NDHASGIANT(&nd);
1503 	if (nd.ni_vp) {
1504 		NDFREE(&nd, NDF_ONLY_PNBUF);
1505 		vrele(nd.ni_vp);
1506 		if (nd.ni_vp == nd.ni_dvp)
1507 			vrele(nd.ni_dvp);
1508 		else
1509 			vput(nd.ni_dvp);
1510 		VFS_UNLOCK_GIANT(vfslocked);
1511 		error = EEXIST;
1512 		goto out;
1513 	}
1514 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1515 		NDFREE(&nd, NDF_ONLY_PNBUF);
1516 		vput(nd.ni_dvp);
1517 		VFS_UNLOCK_GIANT(vfslocked);
1518 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1519 			goto out;
1520 		goto restart;
1521 	}
1522 	VATTR_NULL(&vattr);
1523 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1524 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1525 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1526 #ifdef MAC
1527 	vattr.va_type = VLNK;
1528 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1529 	    &vattr);
1530 	if (error)
1531 		goto out2;
1532 #endif
1533 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1534 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1535 	if (error == 0)
1536 		vput(nd.ni_vp);
1537 #ifdef MAC
1538 out2:
1539 #endif
1540 	NDFREE(&nd, NDF_ONLY_PNBUF);
1541 	vput(nd.ni_dvp);
1542 	vn_finished_write(mp);
1543 	VFS_UNLOCK_GIANT(vfslocked);
1544 out:
1545 	if (segflg != UIO_SYSSPACE)
1546 		uma_zfree(namei_zone, syspath);
1547 	return (error);
1548 }
1549 
1550 /*
1551  * Delete a whiteout from the filesystem.
1552  */
1553 int
1554 undelete(td, uap)
1555 	struct thread *td;
1556 	register struct undelete_args /* {
1557 		char *path;
1558 	} */ *uap;
1559 {
1560 	int error;
1561 	struct mount *mp;
1562 	struct nameidata nd;
1563 	int vfslocked;
1564 
1565 restart:
1566 	bwillwrite();
1567 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1568 	    uap->path, td);
1569 	error = namei(&nd);
1570 	if (error)
1571 		return (error);
1572 	vfslocked = NDHASGIANT(&nd);
1573 
1574 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1575 		NDFREE(&nd, NDF_ONLY_PNBUF);
1576 		if (nd.ni_vp)
1577 			vrele(nd.ni_vp);
1578 		if (nd.ni_vp == nd.ni_dvp)
1579 			vrele(nd.ni_dvp);
1580 		else
1581 			vput(nd.ni_dvp);
1582 		VFS_UNLOCK_GIANT(vfslocked);
1583 		return (EEXIST);
1584 	}
1585 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1586 		NDFREE(&nd, NDF_ONLY_PNBUF);
1587 		vput(nd.ni_dvp);
1588 		VFS_UNLOCK_GIANT(vfslocked);
1589 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1590 			return (error);
1591 		goto restart;
1592 	}
1593 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1594 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1595 	NDFREE(&nd, NDF_ONLY_PNBUF);
1596 	vput(nd.ni_dvp);
1597 	vn_finished_write(mp);
1598 	VFS_UNLOCK_GIANT(vfslocked);
1599 	return (error);
1600 }
1601 
1602 /*
1603  * Delete a name from the filesystem.
1604  */
1605 #ifndef _SYS_SYSPROTO_H_
1606 struct unlink_args {
1607 	char	*path;
1608 };
1609 #endif
1610 int
1611 unlink(td, uap)
1612 	struct thread *td;
1613 	struct unlink_args /* {
1614 		char *path;
1615 	} */ *uap;
1616 {
1617 	int error;
1618 
1619 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1620 	return (error);
1621 }
1622 
1623 int
1624 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1625 {
1626 	struct mount *mp;
1627 	struct vnode *vp;
1628 	int error;
1629 	struct nameidata nd;
1630 	int vfslocked;
1631 
1632 restart:
1633 	bwillwrite();
1634 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1635 	if ((error = namei(&nd)) != 0)
1636 		return (error);
1637 	vfslocked = NDHASGIANT(&nd);
1638 	vp = nd.ni_vp;
1639 	if (vp->v_type == VDIR)
1640 		error = EPERM;		/* POSIX */
1641 	else {
1642 		/*
1643 		 * The root of a mounted filesystem cannot be deleted.
1644 		 *
1645 		 * XXX: can this only be a VDIR case?
1646 		 */
1647 		if (vp->v_vflag & VV_ROOT)
1648 			error = EBUSY;
1649 	}
1650 	if (error == 0) {
1651 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1652 			NDFREE(&nd, NDF_ONLY_PNBUF);
1653 			if (vp == nd.ni_dvp)
1654 				vrele(vp);
1655 			else
1656 				vput(vp);
1657 			vput(nd.ni_dvp);
1658 			VFS_UNLOCK_GIANT(vfslocked);
1659 			if ((error = vn_start_write(NULL, &mp,
1660 			    V_XSLEEP | PCATCH)) != 0)
1661 				return (error);
1662 			goto restart;
1663 		}
1664 #ifdef MAC
1665 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1666 		    &nd.ni_cnd);
1667 		if (error)
1668 			goto out;
1669 #endif
1670 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1671 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1672 #ifdef MAC
1673 out:
1674 #endif
1675 		vn_finished_write(mp);
1676 	}
1677 	NDFREE(&nd, NDF_ONLY_PNBUF);
1678 	if (vp == nd.ni_dvp)
1679 		vrele(vp);
1680 	else
1681 		vput(vp);
1682 	vput(nd.ni_dvp);
1683 	VFS_UNLOCK_GIANT(vfslocked);
1684 	return (error);
1685 }
1686 
1687 /*
1688  * Reposition read/write file offset.
1689  */
1690 #ifndef _SYS_SYSPROTO_H_
1691 struct lseek_args {
1692 	int	fd;
1693 	int	pad;
1694 	off_t	offset;
1695 	int	whence;
1696 };
1697 #endif
1698 int
1699 lseek(td, uap)
1700 	struct thread *td;
1701 	register struct lseek_args /* {
1702 		int fd;
1703 		int pad;
1704 		off_t offset;
1705 		int whence;
1706 	} */ *uap;
1707 {
1708 	struct ucred *cred = td->td_ucred;
1709 	struct file *fp;
1710 	struct vnode *vp;
1711 	struct vattr vattr;
1712 	off_t offset;
1713 	int error, noneg;
1714 	int vfslocked;
1715 
1716 	if ((error = fget(td, uap->fd, &fp)) != 0)
1717 		return (error);
1718 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1719 		fdrop(fp, td);
1720 		return (ESPIPE);
1721 	}
1722 	vp = fp->f_vnode;
1723 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1724 	noneg = (vp->v_type != VCHR);
1725 	offset = uap->offset;
1726 	switch (uap->whence) {
1727 	case L_INCR:
1728 		if (noneg &&
1729 		    (fp->f_offset < 0 ||
1730 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1731 			error = EOVERFLOW;
1732 			break;
1733 		}
1734 		offset += fp->f_offset;
1735 		break;
1736 	case L_XTND:
1737 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1738 		error = VOP_GETATTR(vp, &vattr, cred, td);
1739 		VOP_UNLOCK(vp, 0, td);
1740 		if (error)
1741 			break;
1742 		if (noneg &&
1743 		    (vattr.va_size > OFF_MAX ||
1744 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1745 			error = EOVERFLOW;
1746 			break;
1747 		}
1748 		offset += vattr.va_size;
1749 		break;
1750 	case L_SET:
1751 		break;
1752 	default:
1753 		error = EINVAL;
1754 	}
1755 	if (error == 0 && noneg && offset < 0)
1756 		error = EINVAL;
1757 	if (error != 0)
1758 		goto drop;
1759 	fp->f_offset = offset;
1760 	*(off_t *)(td->td_retval) = fp->f_offset;
1761 drop:
1762 	fdrop(fp, td);
1763 	VFS_UNLOCK_GIANT(vfslocked);
1764 	return (error);
1765 }
1766 
1767 #if defined(COMPAT_43)
1768 /*
1769  * Reposition read/write file offset.
1770  */
1771 #ifndef _SYS_SYSPROTO_H_
1772 struct olseek_args {
1773 	int	fd;
1774 	long	offset;
1775 	int	whence;
1776 };
1777 #endif
1778 int
1779 olseek(td, uap)
1780 	struct thread *td;
1781 	register struct olseek_args /* {
1782 		int fd;
1783 		long offset;
1784 		int whence;
1785 	} */ *uap;
1786 {
1787 	struct lseek_args /* {
1788 		int fd;
1789 		int pad;
1790 		off_t offset;
1791 		int whence;
1792 	} */ nuap;
1793 	int error;
1794 
1795 	nuap.fd = uap->fd;
1796 	nuap.offset = uap->offset;
1797 	nuap.whence = uap->whence;
1798 	error = lseek(td, &nuap);
1799 	return (error);
1800 }
1801 #endif /* COMPAT_43 */
1802 
1803 /*
1804  * Check access permissions using passed credentials.
1805  */
1806 static int
1807 vn_access(vp, user_flags, cred, td)
1808 	struct vnode	*vp;
1809 	int		user_flags;
1810 	struct ucred	*cred;
1811 	struct thread	*td;
1812 {
1813 	int error, flags;
1814 
1815 	/* Flags == 0 means only check for existence. */
1816 	error = 0;
1817 	if (user_flags) {
1818 		flags = 0;
1819 		if (user_flags & R_OK)
1820 			flags |= VREAD;
1821 		if (user_flags & W_OK)
1822 			flags |= VWRITE;
1823 		if (user_flags & X_OK)
1824 			flags |= VEXEC;
1825 #ifdef MAC
1826 		error = mac_check_vnode_access(cred, vp, flags);
1827 		if (error)
1828 			return (error);
1829 #endif
1830 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1831 			error = VOP_ACCESS(vp, flags, cred, td);
1832 	}
1833 	return (error);
1834 }
1835 
1836 /*
1837  * Check access permissions using "real" credentials.
1838  */
1839 #ifndef _SYS_SYSPROTO_H_
1840 struct access_args {
1841 	char	*path;
1842 	int	flags;
1843 };
1844 #endif
1845 int
1846 access(td, uap)
1847 	struct thread *td;
1848 	register struct access_args /* {
1849 		char *path;
1850 		int flags;
1851 	} */ *uap;
1852 {
1853 
1854 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1855 }
1856 
1857 int
1858 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1859 {
1860 	struct ucred *cred, *tmpcred;
1861 	register struct vnode *vp;
1862 	struct nameidata nd;
1863 	int vfslocked;
1864 	int error;
1865 
1866 	/*
1867 	 * Create and modify a temporary credential instead of one that
1868 	 * is potentially shared.  This could also mess up socket
1869 	 * buffer accounting which can run in an interrupt context.
1870 	 */
1871 	cred = td->td_ucred;
1872 	tmpcred = crdup(cred);
1873 	tmpcred->cr_uid = cred->cr_ruid;
1874 	tmpcred->cr_groups[0] = cred->cr_rgid;
1875 	td->td_ucred = tmpcred;
1876 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
1877 	if ((error = namei(&nd)) != 0)
1878 		goto out1;
1879 	vfslocked = NDHASGIANT(&nd);
1880 	vp = nd.ni_vp;
1881 
1882 	error = vn_access(vp, flags, tmpcred, td);
1883 	NDFREE(&nd, NDF_ONLY_PNBUF);
1884 	vput(vp);
1885 	VFS_UNLOCK_GIANT(vfslocked);
1886 out1:
1887 	td->td_ucred = cred;
1888 	crfree(tmpcred);
1889 	return (error);
1890 }
1891 
1892 /*
1893  * Check access permissions using "effective" credentials.
1894  */
1895 #ifndef _SYS_SYSPROTO_H_
1896 struct eaccess_args {
1897 	char	*path;
1898 	int	flags;
1899 };
1900 #endif
1901 int
1902 eaccess(td, uap)
1903 	struct thread *td;
1904 	register struct eaccess_args /* {
1905 		char *path;
1906 		int flags;
1907 	} */ *uap;
1908 {
1909 	struct nameidata nd;
1910 	struct vnode *vp;
1911 	int vfslocked;
1912 	int error;
1913 
1914 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
1915 	    uap->path, td);
1916 	if ((error = namei(&nd)) != 0)
1917 		return (error);
1918 	vp = nd.ni_vp;
1919 	vfslocked = NDHASGIANT(&nd);
1920 	error = vn_access(vp, uap->flags, td->td_ucred, td);
1921 	NDFREE(&nd, NDF_ONLY_PNBUF);
1922 	vput(vp);
1923 	VFS_UNLOCK_GIANT(vfslocked);
1924 	return (error);
1925 }
1926 
1927 #if defined(COMPAT_43)
1928 /*
1929  * Get file status; this version follows links.
1930  */
1931 #ifndef _SYS_SYSPROTO_H_
1932 struct ostat_args {
1933 	char	*path;
1934 	struct ostat *ub;
1935 };
1936 #endif
1937 int
1938 ostat(td, uap)
1939 	struct thread *td;
1940 	register struct ostat_args /* {
1941 		char *path;
1942 		struct ostat *ub;
1943 	} */ *uap;
1944 {
1945 	struct stat sb;
1946 	struct ostat osb;
1947 	int error;
1948 
1949 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1950 	if (error)
1951 		return (error);
1952 	cvtstat(&sb, &osb);
1953 	error = copyout(&osb, uap->ub, sizeof (osb));
1954 	return (error);
1955 }
1956 
1957 /*
1958  * Get file status; this version does not follow links.
1959  */
1960 #ifndef _SYS_SYSPROTO_H_
1961 struct olstat_args {
1962 	char	*path;
1963 	struct ostat *ub;
1964 };
1965 #endif
1966 int
1967 olstat(td, uap)
1968 	struct thread *td;
1969 	register struct olstat_args /* {
1970 		char *path;
1971 		struct ostat *ub;
1972 	} */ *uap;
1973 {
1974 	struct stat sb;
1975 	struct ostat osb;
1976 	int error;
1977 
1978 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1979 	if (error)
1980 		return (error);
1981 	cvtstat(&sb, &osb);
1982 	error = copyout(&osb, uap->ub, sizeof (osb));
1983 	return (error);
1984 }
1985 
1986 /*
1987  * Convert from an old to a new stat structure.
1988  */
1989 void
1990 cvtstat(st, ost)
1991 	struct stat *st;
1992 	struct ostat *ost;
1993 {
1994 
1995 	ost->st_dev = st->st_dev;
1996 	ost->st_ino = st->st_ino;
1997 	ost->st_mode = st->st_mode;
1998 	ost->st_nlink = st->st_nlink;
1999 	ost->st_uid = st->st_uid;
2000 	ost->st_gid = st->st_gid;
2001 	ost->st_rdev = st->st_rdev;
2002 	if (st->st_size < (quad_t)1 << 32)
2003 		ost->st_size = st->st_size;
2004 	else
2005 		ost->st_size = -2;
2006 	ost->st_atime = st->st_atime;
2007 	ost->st_mtime = st->st_mtime;
2008 	ost->st_ctime = st->st_ctime;
2009 	ost->st_blksize = st->st_blksize;
2010 	ost->st_blocks = st->st_blocks;
2011 	ost->st_flags = st->st_flags;
2012 	ost->st_gen = st->st_gen;
2013 }
2014 #endif /* COMPAT_43 */
2015 
2016 /*
2017  * Get file status; this version follows links.
2018  */
2019 #ifndef _SYS_SYSPROTO_H_
2020 struct stat_args {
2021 	char	*path;
2022 	struct stat *ub;
2023 };
2024 #endif
2025 int
2026 stat(td, uap)
2027 	struct thread *td;
2028 	register struct stat_args /* {
2029 		char *path;
2030 		struct stat *ub;
2031 	} */ *uap;
2032 {
2033 	struct stat sb;
2034 	int error;
2035 
2036 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2037 	if (error == 0)
2038 		error = copyout(&sb, uap->ub, sizeof (sb));
2039 	return (error);
2040 }
2041 
2042 int
2043 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2044 {
2045 	struct nameidata nd;
2046 	struct stat sb;
2047 	int error, vfslocked;
2048 
2049 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE,
2050 	    pathseg, path, td);
2051 	if ((error = namei(&nd)) != 0)
2052 		return (error);
2053 	vfslocked = NDHASGIANT(&nd);
2054 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2055 	NDFREE(&nd, NDF_ONLY_PNBUF);
2056 	vput(nd.ni_vp);
2057 	VFS_UNLOCK_GIANT(vfslocked);
2058 	if (error)
2059 		return (error);
2060 	*sbp = sb;
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Get file status; this version does not follow links.
2066  */
2067 #ifndef _SYS_SYSPROTO_H_
2068 struct lstat_args {
2069 	char	*path;
2070 	struct stat *ub;
2071 };
2072 #endif
2073 int
2074 lstat(td, uap)
2075 	struct thread *td;
2076 	register struct lstat_args /* {
2077 		char *path;
2078 		struct stat *ub;
2079 	} */ *uap;
2080 {
2081 	struct stat sb;
2082 	int error;
2083 
2084 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2085 	if (error == 0)
2086 		error = copyout(&sb, uap->ub, sizeof (sb));
2087 	return (error);
2088 }
2089 
2090 int
2091 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2092 {
2093 	struct vnode *vp;
2094 	struct stat sb;
2095 	struct nameidata nd;
2096 	int error, vfslocked;
2097 
2098 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE,
2099 	    pathseg, path, td);
2100 	if ((error = namei(&nd)) != 0)
2101 		return (error);
2102 	vfslocked = NDHASGIANT(&nd);
2103 	vp = nd.ni_vp;
2104 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2105 	NDFREE(&nd, NDF_ONLY_PNBUF);
2106 	vput(vp);
2107 	VFS_UNLOCK_GIANT(vfslocked);
2108 	if (error)
2109 		return (error);
2110 	*sbp = sb;
2111 	return (0);
2112 }
2113 
2114 /*
2115  * Implementation of the NetBSD [l]stat() functions.
2116  */
2117 void
2118 cvtnstat(sb, nsb)
2119 	struct stat *sb;
2120 	struct nstat *nsb;
2121 {
2122 	bzero(nsb, sizeof *nsb);
2123 	nsb->st_dev = sb->st_dev;
2124 	nsb->st_ino = sb->st_ino;
2125 	nsb->st_mode = sb->st_mode;
2126 	nsb->st_nlink = sb->st_nlink;
2127 	nsb->st_uid = sb->st_uid;
2128 	nsb->st_gid = sb->st_gid;
2129 	nsb->st_rdev = sb->st_rdev;
2130 	nsb->st_atimespec = sb->st_atimespec;
2131 	nsb->st_mtimespec = sb->st_mtimespec;
2132 	nsb->st_ctimespec = sb->st_ctimespec;
2133 	nsb->st_size = sb->st_size;
2134 	nsb->st_blocks = sb->st_blocks;
2135 	nsb->st_blksize = sb->st_blksize;
2136 	nsb->st_flags = sb->st_flags;
2137 	nsb->st_gen = sb->st_gen;
2138 	nsb->st_birthtimespec = sb->st_birthtimespec;
2139 }
2140 
2141 #ifndef _SYS_SYSPROTO_H_
2142 struct nstat_args {
2143 	char	*path;
2144 	struct nstat *ub;
2145 };
2146 #endif
2147 int
2148 nstat(td, uap)
2149 	struct thread *td;
2150 	register struct nstat_args /* {
2151 		char *path;
2152 		struct nstat *ub;
2153 	} */ *uap;
2154 {
2155 	struct stat sb;
2156 	struct nstat nsb;
2157 	int error;
2158 
2159 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2160 	if (error)
2161 		return (error);
2162 	cvtnstat(&sb, &nsb);
2163 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2164 	return (error);
2165 }
2166 
2167 /*
2168  * NetBSD lstat.  Get file status; this version does not follow links.
2169  */
2170 #ifndef _SYS_SYSPROTO_H_
2171 struct lstat_args {
2172 	char	*path;
2173 	struct stat *ub;
2174 };
2175 #endif
2176 int
2177 nlstat(td, uap)
2178 	struct thread *td;
2179 	register struct nlstat_args /* {
2180 		char *path;
2181 		struct nstat *ub;
2182 	} */ *uap;
2183 {
2184 	struct stat sb;
2185 	struct nstat nsb;
2186 	int error;
2187 
2188 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2189 	if (error)
2190 		return (error);
2191 	cvtnstat(&sb, &nsb);
2192 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2193 	return (error);
2194 }
2195 
2196 /*
2197  * Get configurable pathname variables.
2198  */
2199 #ifndef _SYS_SYSPROTO_H_
2200 struct pathconf_args {
2201 	char	*path;
2202 	int	name;
2203 };
2204 #endif
2205 int
2206 pathconf(td, uap)
2207 	struct thread *td;
2208 	register struct pathconf_args /* {
2209 		char *path;
2210 		int name;
2211 	} */ *uap;
2212 {
2213 
2214 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2215 }
2216 
2217 int
2218 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2219 {
2220 	struct nameidata nd;
2221 	int error, vfslocked;
2222 
2223 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2224 	if ((error = namei(&nd)) != 0)
2225 		return (error);
2226 	vfslocked = NDHASGIANT(&nd);
2227 	NDFREE(&nd, NDF_ONLY_PNBUF);
2228 
2229 	/* If asynchronous I/O is available, it works for all files. */
2230 	if (name == _PC_ASYNC_IO)
2231 		td->td_retval[0] = async_io_version;
2232 	else
2233 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2234 	vput(nd.ni_vp);
2235 	VFS_UNLOCK_GIANT(vfslocked);
2236 	return (error);
2237 }
2238 
2239 /*
2240  * Return target name of a symbolic link.
2241  */
2242 #ifndef _SYS_SYSPROTO_H_
2243 struct readlink_args {
2244 	char	*path;
2245 	char	*buf;
2246 	int	count;
2247 };
2248 #endif
2249 int
2250 readlink(td, uap)
2251 	struct thread *td;
2252 	register struct readlink_args /* {
2253 		char *path;
2254 		char *buf;
2255 		int count;
2256 	} */ *uap;
2257 {
2258 
2259 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2260 	    UIO_USERSPACE, uap->count));
2261 }
2262 
2263 int
2264 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2265     enum uio_seg bufseg, int count)
2266 {
2267 	register struct vnode *vp;
2268 	struct iovec aiov;
2269 	struct uio auio;
2270 	int error;
2271 	struct nameidata nd;
2272 	int vfslocked;
2273 
2274 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2275 	if ((error = namei(&nd)) != 0)
2276 		return (error);
2277 	NDFREE(&nd, NDF_ONLY_PNBUF);
2278 	vfslocked = NDHASGIANT(&nd);
2279 	vp = nd.ni_vp;
2280 #ifdef MAC
2281 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2282 	if (error) {
2283 		vput(vp);
2284 		VFS_UNLOCK_GIANT(vfslocked);
2285 		return (error);
2286 	}
2287 #endif
2288 	if (vp->v_type != VLNK)
2289 		error = EINVAL;
2290 	else {
2291 		aiov.iov_base = buf;
2292 		aiov.iov_len = count;
2293 		auio.uio_iov = &aiov;
2294 		auio.uio_iovcnt = 1;
2295 		auio.uio_offset = 0;
2296 		auio.uio_rw = UIO_READ;
2297 		auio.uio_segflg = bufseg;
2298 		auio.uio_td = td;
2299 		auio.uio_resid = count;
2300 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2301 	}
2302 	vput(vp);
2303 	VFS_UNLOCK_GIANT(vfslocked);
2304 	td->td_retval[0] = count - auio.uio_resid;
2305 	return (error);
2306 }
2307 
2308 /*
2309  * Common implementation code for chflags() and fchflags().
2310  */
2311 static int
2312 setfflags(td, vp, flags)
2313 	struct thread *td;
2314 	struct vnode *vp;
2315 	int flags;
2316 {
2317 	int error;
2318 	struct mount *mp;
2319 	struct vattr vattr;
2320 
2321 	/*
2322 	 * Prevent non-root users from setting flags on devices.  When
2323 	 * a device is reused, users can retain ownership of the device
2324 	 * if they are allowed to set flags and programs assume that
2325 	 * chown can't fail when done as root.
2326 	 */
2327 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2328 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2329 		if (error)
2330 			return (error);
2331 	}
2332 
2333 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2334 		return (error);
2335 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2336 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2337 	VATTR_NULL(&vattr);
2338 	vattr.va_flags = flags;
2339 #ifdef MAC
2340 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2341 	if (error == 0)
2342 #endif
2343 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2344 	VOP_UNLOCK(vp, 0, td);
2345 	vn_finished_write(mp);
2346 	return (error);
2347 }
2348 
2349 /*
2350  * Change flags of a file given a path name.
2351  */
2352 #ifndef _SYS_SYSPROTO_H_
2353 struct chflags_args {
2354 	char	*path;
2355 	int	flags;
2356 };
2357 #endif
2358 int
2359 chflags(td, uap)
2360 	struct thread *td;
2361 	register struct chflags_args /* {
2362 		char *path;
2363 		int flags;
2364 	} */ *uap;
2365 {
2366 	int error;
2367 	struct nameidata nd;
2368 	int vfslocked;
2369 
2370 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2371 	if ((error = namei(&nd)) != 0)
2372 		return (error);
2373 	NDFREE(&nd, NDF_ONLY_PNBUF);
2374 	vfslocked = NDHASGIANT(&nd);
2375 	error = setfflags(td, nd.ni_vp, uap->flags);
2376 	vrele(nd.ni_vp);
2377 	VFS_UNLOCK_GIANT(vfslocked);
2378 	return (error);
2379 }
2380 
2381 /*
2382  * Same as chflags() but doesn't follow symlinks.
2383  */
2384 int
2385 lchflags(td, uap)
2386 	struct thread *td;
2387 	register struct lchflags_args /* {
2388 		char *path;
2389 		int flags;
2390 	} */ *uap;
2391 {
2392 	int error;
2393 	struct nameidata nd;
2394 	int vfslocked;
2395 
2396 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2397 	if ((error = namei(&nd)) != 0)
2398 		return (error);
2399 	vfslocked = NDHASGIANT(&nd);
2400 	NDFREE(&nd, NDF_ONLY_PNBUF);
2401 	error = setfflags(td, nd.ni_vp, uap->flags);
2402 	vrele(nd.ni_vp);
2403 	VFS_UNLOCK_GIANT(vfslocked);
2404 	return (error);
2405 }
2406 
2407 /*
2408  * Change flags of a file given a file descriptor.
2409  */
2410 #ifndef _SYS_SYSPROTO_H_
2411 struct fchflags_args {
2412 	int	fd;
2413 	int	flags;
2414 };
2415 #endif
2416 int
2417 fchflags(td, uap)
2418 	struct thread *td;
2419 	register struct fchflags_args /* {
2420 		int fd;
2421 		int flags;
2422 	} */ *uap;
2423 {
2424 	struct file *fp;
2425 	int vfslocked;
2426 	int error;
2427 
2428 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2429 		return (error);
2430 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2431 	error = setfflags(td, fp->f_vnode, uap->flags);
2432 	VFS_UNLOCK_GIANT(vfslocked);
2433 	fdrop(fp, td);
2434 	return (error);
2435 }
2436 
2437 /*
2438  * Common implementation code for chmod(), lchmod() and fchmod().
2439  */
2440 static int
2441 setfmode(td, vp, mode)
2442 	struct thread *td;
2443 	struct vnode *vp;
2444 	int mode;
2445 {
2446 	int error;
2447 	struct mount *mp;
2448 	struct vattr vattr;
2449 
2450 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2451 		return (error);
2452 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2453 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2454 	VATTR_NULL(&vattr);
2455 	vattr.va_mode = mode & ALLPERMS;
2456 #ifdef MAC
2457 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2458 	if (error == 0)
2459 #endif
2460 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2461 	VOP_UNLOCK(vp, 0, td);
2462 	vn_finished_write(mp);
2463 	return (error);
2464 }
2465 
2466 /*
2467  * Change mode of a file given path name.
2468  */
2469 #ifndef _SYS_SYSPROTO_H_
2470 struct chmod_args {
2471 	char	*path;
2472 	int	mode;
2473 };
2474 #endif
2475 int
2476 chmod(td, uap)
2477 	struct thread *td;
2478 	register struct chmod_args /* {
2479 		char *path;
2480 		int mode;
2481 	} */ *uap;
2482 {
2483 
2484 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2485 }
2486 
2487 int
2488 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2489 {
2490 	int error;
2491 	struct nameidata nd;
2492 	int vfslocked;
2493 
2494 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2495 	if ((error = namei(&nd)) != 0)
2496 		return (error);
2497 	vfslocked = NDHASGIANT(&nd);
2498 	NDFREE(&nd, NDF_ONLY_PNBUF);
2499 	error = setfmode(td, nd.ni_vp, mode);
2500 	vrele(nd.ni_vp);
2501 	VFS_UNLOCK_GIANT(vfslocked);
2502 	return (error);
2503 }
2504 
2505 /*
2506  * Change mode of a file given path name (don't follow links.)
2507  */
2508 #ifndef _SYS_SYSPROTO_H_
2509 struct lchmod_args {
2510 	char	*path;
2511 	int	mode;
2512 };
2513 #endif
2514 int
2515 lchmod(td, uap)
2516 	struct thread *td;
2517 	register struct lchmod_args /* {
2518 		char *path;
2519 		int mode;
2520 	} */ *uap;
2521 {
2522 	int error;
2523 	struct nameidata nd;
2524 	int vfslocked;
2525 
2526 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2527 	if ((error = namei(&nd)) != 0)
2528 		return (error);
2529 	vfslocked = NDHASGIANT(&nd);
2530 	NDFREE(&nd, NDF_ONLY_PNBUF);
2531 	error = setfmode(td, nd.ni_vp, uap->mode);
2532 	vrele(nd.ni_vp);
2533 	VFS_UNLOCK_GIANT(vfslocked);
2534 	return (error);
2535 }
2536 
2537 /*
2538  * Change mode of a file given a file descriptor.
2539  */
2540 #ifndef _SYS_SYSPROTO_H_
2541 struct fchmod_args {
2542 	int	fd;
2543 	int	mode;
2544 };
2545 #endif
2546 int
2547 fchmod(td, uap)
2548 	struct thread *td;
2549 	register struct fchmod_args /* {
2550 		int fd;
2551 		int mode;
2552 	} */ *uap;
2553 {
2554 	struct file *fp;
2555 	int vfslocked;
2556 	int error;
2557 
2558 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2559 		return (error);
2560 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2561 	error = setfmode(td, fp->f_vnode, uap->mode);
2562 	VFS_UNLOCK_GIANT(vfslocked);
2563 	fdrop(fp, td);
2564 	return (error);
2565 }
2566 
2567 /*
2568  * Common implementation for chown(), lchown(), and fchown()
2569  */
2570 static int
2571 setfown(td, vp, uid, gid)
2572 	struct thread *td;
2573 	struct vnode *vp;
2574 	uid_t uid;
2575 	gid_t gid;
2576 {
2577 	int error;
2578 	struct mount *mp;
2579 	struct vattr vattr;
2580 
2581 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2582 		return (error);
2583 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2584 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2585 	VATTR_NULL(&vattr);
2586 	vattr.va_uid = uid;
2587 	vattr.va_gid = gid;
2588 #ifdef MAC
2589 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2590 	    vattr.va_gid);
2591 	if (error == 0)
2592 #endif
2593 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2594 	VOP_UNLOCK(vp, 0, td);
2595 	vn_finished_write(mp);
2596 	return (error);
2597 }
2598 
2599 /*
2600  * Set ownership given a path name.
2601  */
2602 #ifndef _SYS_SYSPROTO_H_
2603 struct chown_args {
2604 	char	*path;
2605 	int	uid;
2606 	int	gid;
2607 };
2608 #endif
2609 int
2610 chown(td, uap)
2611 	struct thread *td;
2612 	register struct chown_args /* {
2613 		char *path;
2614 		int uid;
2615 		int gid;
2616 	} */ *uap;
2617 {
2618 
2619 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2620 }
2621 
2622 int
2623 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2624     int gid)
2625 {
2626 	int error;
2627 	struct nameidata nd;
2628 	int vfslocked;
2629 
2630 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2631 	if ((error = namei(&nd)) != 0)
2632 		return (error);
2633 	vfslocked = NDHASGIANT(&nd);
2634 	NDFREE(&nd, NDF_ONLY_PNBUF);
2635 	error = setfown(td, nd.ni_vp, uid, gid);
2636 	vrele(nd.ni_vp);
2637 	VFS_UNLOCK_GIANT(vfslocked);
2638 	return (error);
2639 }
2640 
2641 /*
2642  * Set ownership given a path name, do not cross symlinks.
2643  */
2644 #ifndef _SYS_SYSPROTO_H_
2645 struct lchown_args {
2646 	char	*path;
2647 	int	uid;
2648 	int	gid;
2649 };
2650 #endif
2651 int
2652 lchown(td, uap)
2653 	struct thread *td;
2654 	register struct lchown_args /* {
2655 		char *path;
2656 		int uid;
2657 		int gid;
2658 	} */ *uap;
2659 {
2660 
2661 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2662 }
2663 
2664 int
2665 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2666     int gid)
2667 {
2668 	int error;
2669 	struct nameidata nd;
2670 	int vfslocked;
2671 
2672 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2673 	if ((error = namei(&nd)) != 0)
2674 		return (error);
2675 	vfslocked = NDHASGIANT(&nd);
2676 	NDFREE(&nd, NDF_ONLY_PNBUF);
2677 	error = setfown(td, nd.ni_vp, uid, gid);
2678 	vrele(nd.ni_vp);
2679 	VFS_UNLOCK_GIANT(vfslocked);
2680 	return (error);
2681 }
2682 
2683 /*
2684  * Set ownership given a file descriptor.
2685  */
2686 #ifndef _SYS_SYSPROTO_H_
2687 struct fchown_args {
2688 	int	fd;
2689 	int	uid;
2690 	int	gid;
2691 };
2692 #endif
2693 int
2694 fchown(td, uap)
2695 	struct thread *td;
2696 	register struct fchown_args /* {
2697 		int fd;
2698 		int uid;
2699 		int gid;
2700 	} */ *uap;
2701 {
2702 	struct file *fp;
2703 	int vfslocked;
2704 	int error;
2705 
2706 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2707 		return (error);
2708 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2709 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2710 	VFS_UNLOCK_GIANT(vfslocked);
2711 	fdrop(fp, td);
2712 	return (error);
2713 }
2714 
2715 /*
2716  * Common implementation code for utimes(), lutimes(), and futimes().
2717  */
2718 static int
2719 getutimes(usrtvp, tvpseg, tsp)
2720 	const struct timeval *usrtvp;
2721 	enum uio_seg tvpseg;
2722 	struct timespec *tsp;
2723 {
2724 	struct timeval tv[2];
2725 	const struct timeval *tvp;
2726 	int error;
2727 
2728 	if (usrtvp == NULL) {
2729 		microtime(&tv[0]);
2730 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2731 		tsp[1] = tsp[0];
2732 	} else {
2733 		if (tvpseg == UIO_SYSSPACE) {
2734 			tvp = usrtvp;
2735 		} else {
2736 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2737 				return (error);
2738 			tvp = tv;
2739 		}
2740 
2741 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2742 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2743 	}
2744 	return (0);
2745 }
2746 
2747 /*
2748  * Common implementation code for utimes(), lutimes(), and futimes().
2749  */
2750 static int
2751 setutimes(td, vp, ts, numtimes, nullflag)
2752 	struct thread *td;
2753 	struct vnode *vp;
2754 	const struct timespec *ts;
2755 	int numtimes;
2756 	int nullflag;
2757 {
2758 	int error, setbirthtime;
2759 	struct mount *mp;
2760 	struct vattr vattr;
2761 
2762 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2763 		return (error);
2764 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2765 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2766 	setbirthtime = 0;
2767 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2768 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2769 		setbirthtime = 1;
2770 	VATTR_NULL(&vattr);
2771 	vattr.va_atime = ts[0];
2772 	vattr.va_mtime = ts[1];
2773 	if (setbirthtime)
2774 		vattr.va_birthtime = ts[1];
2775 	if (numtimes > 2)
2776 		vattr.va_birthtime = ts[2];
2777 	if (nullflag)
2778 		vattr.va_vaflags |= VA_UTIMES_NULL;
2779 #ifdef MAC
2780 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2781 	    vattr.va_mtime);
2782 #endif
2783 	if (error == 0)
2784 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2785 	VOP_UNLOCK(vp, 0, td);
2786 	vn_finished_write(mp);
2787 	return (error);
2788 }
2789 
2790 /*
2791  * Set the access and modification times of a file.
2792  */
2793 #ifndef _SYS_SYSPROTO_H_
2794 struct utimes_args {
2795 	char	*path;
2796 	struct	timeval *tptr;
2797 };
2798 #endif
2799 int
2800 utimes(td, uap)
2801 	struct thread *td;
2802 	register struct utimes_args /* {
2803 		char *path;
2804 		struct timeval *tptr;
2805 	} */ *uap;
2806 {
2807 
2808 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2809 	    UIO_USERSPACE));
2810 }
2811 
2812 int
2813 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2814     struct timeval *tptr, enum uio_seg tptrseg)
2815 {
2816 	struct timespec ts[2];
2817 	int error;
2818 	struct nameidata nd;
2819 	int vfslocked;
2820 
2821 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2822 		return (error);
2823 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2824 	if ((error = namei(&nd)) != 0)
2825 		return (error);
2826 	vfslocked = NDHASGIANT(&nd);
2827 	NDFREE(&nd, NDF_ONLY_PNBUF);
2828 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2829 	vrele(nd.ni_vp);
2830 	VFS_UNLOCK_GIANT(vfslocked);
2831 	return (error);
2832 }
2833 
2834 /*
2835  * Set the access and modification times of a file.
2836  */
2837 #ifndef _SYS_SYSPROTO_H_
2838 struct lutimes_args {
2839 	char	*path;
2840 	struct	timeval *tptr;
2841 };
2842 #endif
2843 int
2844 lutimes(td, uap)
2845 	struct thread *td;
2846 	register struct lutimes_args /* {
2847 		char *path;
2848 		struct timeval *tptr;
2849 	} */ *uap;
2850 {
2851 
2852 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2853 	    UIO_USERSPACE));
2854 }
2855 
2856 int
2857 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2858     struct timeval *tptr, enum uio_seg tptrseg)
2859 {
2860 	struct timespec ts[2];
2861 	int error;
2862 	struct nameidata nd;
2863 	int vfslocked;
2864 
2865 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2866 		return (error);
2867 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2868 	if ((error = namei(&nd)) != 0)
2869 		return (error);
2870 	vfslocked = NDHASGIANT(&nd);
2871 	NDFREE(&nd, NDF_ONLY_PNBUF);
2872 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2873 	vrele(nd.ni_vp);
2874 	VFS_UNLOCK_GIANT(vfslocked);
2875 	return (error);
2876 }
2877 
2878 /*
2879  * Set the access and modification times of a file.
2880  */
2881 #ifndef _SYS_SYSPROTO_H_
2882 struct futimes_args {
2883 	int	fd;
2884 	struct	timeval *tptr;
2885 };
2886 #endif
2887 int
2888 futimes(td, uap)
2889 	struct thread *td;
2890 	register struct futimes_args /* {
2891 		int  fd;
2892 		struct timeval *tptr;
2893 	} */ *uap;
2894 {
2895 
2896 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2897 }
2898 
2899 int
2900 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2901     enum uio_seg tptrseg)
2902 {
2903 	struct timespec ts[2];
2904 	struct file *fp;
2905 	int vfslocked;
2906 	int error;
2907 
2908 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2909 		return (error);
2910 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2911 		return (error);
2912 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2913 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2914 	VFS_UNLOCK_GIANT(vfslocked);
2915 	fdrop(fp, td);
2916 	return (error);
2917 }
2918 
2919 /*
2920  * Truncate a file given its path name.
2921  */
2922 #ifndef _SYS_SYSPROTO_H_
2923 struct truncate_args {
2924 	char	*path;
2925 	int	pad;
2926 	off_t	length;
2927 };
2928 #endif
2929 int
2930 truncate(td, uap)
2931 	struct thread *td;
2932 	register struct truncate_args /* {
2933 		char *path;
2934 		int pad;
2935 		off_t length;
2936 	} */ *uap;
2937 {
2938 
2939 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2940 }
2941 
2942 int
2943 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2944 {
2945 	struct mount *mp;
2946 	struct vnode *vp;
2947 	struct vattr vattr;
2948 	int error;
2949 	struct nameidata nd;
2950 	int vfslocked;
2951 
2952 	if (length < 0)
2953 		return(EINVAL);
2954 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2955 	if ((error = namei(&nd)) != 0)
2956 		return (error);
2957 	vfslocked = NDHASGIANT(&nd);
2958 	vp = nd.ni_vp;
2959 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2960 		vrele(vp);
2961 		VFS_UNLOCK_GIANT(vfslocked);
2962 		return (error);
2963 	}
2964 	NDFREE(&nd, NDF_ONLY_PNBUF);
2965 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2966 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2967 	if (vp->v_type == VDIR)
2968 		error = EISDIR;
2969 #ifdef MAC
2970 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2971 	}
2972 #endif
2973 	else if ((error = vn_writechk(vp)) == 0 &&
2974 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2975 		VATTR_NULL(&vattr);
2976 		vattr.va_size = length;
2977 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2978 	}
2979 	vput(vp);
2980 	vn_finished_write(mp);
2981 	VFS_UNLOCK_GIANT(vfslocked);
2982 	return (error);
2983 }
2984 
2985 /*
2986  * Truncate a file given a file descriptor.
2987  */
2988 #ifndef _SYS_SYSPROTO_H_
2989 struct ftruncate_args {
2990 	int	fd;
2991 	int	pad;
2992 	off_t	length;
2993 };
2994 #endif
2995 int
2996 ftruncate(td, uap)
2997 	struct thread *td;
2998 	register struct ftruncate_args /* {
2999 		int fd;
3000 		int pad;
3001 		off_t length;
3002 	} */ *uap;
3003 {
3004 	struct mount *mp;
3005 	struct vattr vattr;
3006 	struct vnode *vp;
3007 	struct file *fp;
3008 	int vfslocked;
3009 	int error;
3010 
3011 	if (uap->length < 0)
3012 		return(EINVAL);
3013 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3014 		return (error);
3015 	if ((fp->f_flag & FWRITE) == 0) {
3016 		fdrop(fp, td);
3017 		return (EINVAL);
3018 	}
3019 	vp = fp->f_vnode;
3020 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3021 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3022 		goto drop;
3023 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3024 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3025 	if (vp->v_type == VDIR)
3026 		error = EISDIR;
3027 #ifdef MAC
3028 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3029 	    vp))) {
3030 	}
3031 #endif
3032 	else if ((error = vn_writechk(vp)) == 0) {
3033 		VATTR_NULL(&vattr);
3034 		vattr.va_size = uap->length;
3035 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3036 	}
3037 	VOP_UNLOCK(vp, 0, td);
3038 	vn_finished_write(mp);
3039 drop:
3040 	VFS_UNLOCK_GIANT(vfslocked);
3041 	fdrop(fp, td);
3042 	return (error);
3043 }
3044 
3045 #if defined(COMPAT_43)
3046 /*
3047  * Truncate a file given its path name.
3048  */
3049 #ifndef _SYS_SYSPROTO_H_
3050 struct otruncate_args {
3051 	char	*path;
3052 	long	length;
3053 };
3054 #endif
3055 int
3056 otruncate(td, uap)
3057 	struct thread *td;
3058 	register struct otruncate_args /* {
3059 		char *path;
3060 		long length;
3061 	} */ *uap;
3062 {
3063 	struct truncate_args /* {
3064 		char *path;
3065 		int pad;
3066 		off_t length;
3067 	} */ nuap;
3068 
3069 	nuap.path = uap->path;
3070 	nuap.length = uap->length;
3071 	return (truncate(td, &nuap));
3072 }
3073 
3074 /*
3075  * Truncate a file given a file descriptor.
3076  */
3077 #ifndef _SYS_SYSPROTO_H_
3078 struct oftruncate_args {
3079 	int	fd;
3080 	long	length;
3081 };
3082 #endif
3083 int
3084 oftruncate(td, uap)
3085 	struct thread *td;
3086 	register struct oftruncate_args /* {
3087 		int fd;
3088 		long length;
3089 	} */ *uap;
3090 {
3091 	struct ftruncate_args /* {
3092 		int fd;
3093 		int pad;
3094 		off_t length;
3095 	} */ nuap;
3096 
3097 	nuap.fd = uap->fd;
3098 	nuap.length = uap->length;
3099 	return (ftruncate(td, &nuap));
3100 }
3101 #endif /* COMPAT_43 */
3102 
3103 /*
3104  * Sync an open file.
3105  */
3106 #ifndef _SYS_SYSPROTO_H_
3107 struct fsync_args {
3108 	int	fd;
3109 };
3110 #endif
3111 int
3112 fsync(td, uap)
3113 	struct thread *td;
3114 	struct fsync_args /* {
3115 		int fd;
3116 	} */ *uap;
3117 {
3118 	struct vnode *vp;
3119 	struct mount *mp;
3120 	struct file *fp;
3121 	int vfslocked;
3122 	int error;
3123 
3124 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3125 		return (error);
3126 	vp = fp->f_vnode;
3127 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3128 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3129 		goto drop;
3130 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3131 	if (vp->v_object != NULL) {
3132 		VM_OBJECT_LOCK(vp->v_object);
3133 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3134 		VM_OBJECT_UNLOCK(vp->v_object);
3135 	}
3136 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3137 
3138 	VOP_UNLOCK(vp, 0, td);
3139 	vn_finished_write(mp);
3140 drop:
3141 	VFS_UNLOCK_GIANT(vfslocked);
3142 	fdrop(fp, td);
3143 	return (error);
3144 }
3145 
3146 /*
3147  * Rename files.  Source and destination must either both be directories,
3148  * or both not be directories.  If target is a directory, it must be empty.
3149  */
3150 #ifndef _SYS_SYSPROTO_H_
3151 struct rename_args {
3152 	char	*from;
3153 	char	*to;
3154 };
3155 #endif
3156 int
3157 rename(td, uap)
3158 	struct thread *td;
3159 	register struct rename_args /* {
3160 		char *from;
3161 		char *to;
3162 	} */ *uap;
3163 {
3164 
3165 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3166 }
3167 
3168 int
3169 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3170 {
3171 	struct mount *mp = NULL;
3172 	struct vnode *tvp, *fvp, *tdvp;
3173 	struct nameidata fromnd, tond;
3174 	int tvfslocked;
3175 	int fvfslocked;
3176 	int error;
3177 
3178 	bwillwrite();
3179 #ifdef MAC
3180 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3181 	    pathseg, from, td);
3182 #else
3183 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3184 	    pathseg, from, td);
3185 #endif
3186 	if ((error = namei(&fromnd)) != 0)
3187 		return (error);
3188 	fvfslocked = NDHASGIANT(&fromnd);
3189 	tvfslocked = 0;
3190 #ifdef MAC
3191 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3192 	    fromnd.ni_vp, &fromnd.ni_cnd);
3193 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3194 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3195 #endif
3196 	fvp = fromnd.ni_vp;
3197 	if (error == 0)
3198 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3199 	if (error != 0) {
3200 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3201 		vrele(fromnd.ni_dvp);
3202 		vrele(fvp);
3203 		goto out1;
3204 	}
3205 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3206 	    MPSAFE, pathseg, to, td);
3207 	if (fromnd.ni_vp->v_type == VDIR)
3208 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3209 	if ((error = namei(&tond)) != 0) {
3210 		/* Translate error code for rename("dir1", "dir2/."). */
3211 		if (error == EISDIR && fvp->v_type == VDIR)
3212 			error = EINVAL;
3213 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3214 		vrele(fromnd.ni_dvp);
3215 		vrele(fvp);
3216 		vn_finished_write(mp);
3217 		goto out1;
3218 	}
3219 	tvfslocked = NDHASGIANT(&tond);
3220 	tdvp = tond.ni_dvp;
3221 	tvp = tond.ni_vp;
3222 	if (tvp != NULL) {
3223 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3224 			error = ENOTDIR;
3225 			goto out;
3226 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3227 			error = EISDIR;
3228 			goto out;
3229 		}
3230 	}
3231 	if (fvp == tdvp)
3232 		error = EINVAL;
3233 	/*
3234 	 * If the source is the same as the destination (that is, if they
3235 	 * are links to the same vnode), then there is nothing to do.
3236 	 */
3237 	if (fvp == tvp)
3238 		error = -1;
3239 #ifdef MAC
3240 	else
3241 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3242 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3243 #endif
3244 out:
3245 	if (!error) {
3246 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3247 		if (fromnd.ni_dvp != tdvp) {
3248 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3249 		}
3250 		if (tvp) {
3251 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3252 		}
3253 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3254 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3255 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3256 		NDFREE(&tond, NDF_ONLY_PNBUF);
3257 	} else {
3258 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3259 		NDFREE(&tond, NDF_ONLY_PNBUF);
3260 		if (tvp)
3261 			vput(tvp);
3262 		if (tdvp == tvp)
3263 			vrele(tdvp);
3264 		else
3265 			vput(tdvp);
3266 		vrele(fromnd.ni_dvp);
3267 		vrele(fvp);
3268 	}
3269 	vrele(tond.ni_startdir);
3270 	vn_finished_write(mp);
3271 out1:
3272 	if (fromnd.ni_startdir)
3273 		vrele(fromnd.ni_startdir);
3274 	VFS_UNLOCK_GIANT(fvfslocked);
3275 	VFS_UNLOCK_GIANT(tvfslocked);
3276 	if (error == -1)
3277 		return (0);
3278 	return (error);
3279 }
3280 
3281 /*
3282  * Make a directory file.
3283  */
3284 #ifndef _SYS_SYSPROTO_H_
3285 struct mkdir_args {
3286 	char	*path;
3287 	int	mode;
3288 };
3289 #endif
3290 int
3291 mkdir(td, uap)
3292 	struct thread *td;
3293 	register struct mkdir_args /* {
3294 		char *path;
3295 		int mode;
3296 	} */ *uap;
3297 {
3298 
3299 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3300 }
3301 
3302 int
3303 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3304 {
3305 	struct mount *mp;
3306 	struct vnode *vp;
3307 	struct vattr vattr;
3308 	int error;
3309 	struct nameidata nd;
3310 	int vfslocked;
3311 
3312 restart:
3313 	bwillwrite();
3314 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3315 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3316 	if ((error = namei(&nd)) != 0)
3317 		return (error);
3318 	vfslocked = NDHASGIANT(&nd);
3319 	vp = nd.ni_vp;
3320 	if (vp != NULL) {
3321 		NDFREE(&nd, NDF_ONLY_PNBUF);
3322 		vrele(vp);
3323 		/*
3324 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3325 		 * the strange behaviour of leaving the vnode unlocked
3326 		 * if the target is the same vnode as the parent.
3327 		 */
3328 		if (vp == nd.ni_dvp)
3329 			vrele(nd.ni_dvp);
3330 		else
3331 			vput(nd.ni_dvp);
3332 		VFS_UNLOCK_GIANT(vfslocked);
3333 		return (EEXIST);
3334 	}
3335 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3336 		NDFREE(&nd, NDF_ONLY_PNBUF);
3337 		vput(nd.ni_dvp);
3338 		VFS_UNLOCK_GIANT(vfslocked);
3339 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3340 			return (error);
3341 		goto restart;
3342 	}
3343 	VATTR_NULL(&vattr);
3344 	vattr.va_type = VDIR;
3345 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3346 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3347 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3348 #ifdef MAC
3349 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3350 	    &vattr);
3351 	if (error)
3352 		goto out;
3353 #endif
3354 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3355 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3356 #ifdef MAC
3357 out:
3358 #endif
3359 	NDFREE(&nd, NDF_ONLY_PNBUF);
3360 	vput(nd.ni_dvp);
3361 	if (!error)
3362 		vput(nd.ni_vp);
3363 	vn_finished_write(mp);
3364 	VFS_UNLOCK_GIANT(vfslocked);
3365 	return (error);
3366 }
3367 
3368 /*
3369  * Remove a directory file.
3370  */
3371 #ifndef _SYS_SYSPROTO_H_
3372 struct rmdir_args {
3373 	char	*path;
3374 };
3375 #endif
3376 int
3377 rmdir(td, uap)
3378 	struct thread *td;
3379 	struct rmdir_args /* {
3380 		char *path;
3381 	} */ *uap;
3382 {
3383 
3384 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3385 }
3386 
3387 int
3388 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3389 {
3390 	struct mount *mp;
3391 	struct vnode *vp;
3392 	int error;
3393 	struct nameidata nd;
3394 	int vfslocked;
3395 
3396 restart:
3397 	bwillwrite();
3398 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3399 	if ((error = namei(&nd)) != 0)
3400 		return (error);
3401 	vfslocked = NDHASGIANT(&nd);
3402 	vp = nd.ni_vp;
3403 	if (vp->v_type != VDIR) {
3404 		error = ENOTDIR;
3405 		goto out;
3406 	}
3407 	/*
3408 	 * No rmdir "." please.
3409 	 */
3410 	if (nd.ni_dvp == vp) {
3411 		error = EINVAL;
3412 		goto out;
3413 	}
3414 	/*
3415 	 * The root of a mounted filesystem cannot be deleted.
3416 	 */
3417 	if (vp->v_vflag & VV_ROOT) {
3418 		error = EBUSY;
3419 		goto out;
3420 	}
3421 #ifdef MAC
3422 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3423 	    &nd.ni_cnd);
3424 	if (error)
3425 		goto out;
3426 #endif
3427 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3428 		NDFREE(&nd, NDF_ONLY_PNBUF);
3429 		if (nd.ni_dvp == vp)
3430 			vrele(nd.ni_dvp);
3431 		else
3432 			vput(nd.ni_dvp);
3433 		vput(vp);
3434 		VFS_UNLOCK_GIANT(vfslocked);
3435 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3436 			return (error);
3437 		goto restart;
3438 	}
3439 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3440 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3441 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3442 	vn_finished_write(mp);
3443 out:
3444 	NDFREE(&nd, NDF_ONLY_PNBUF);
3445 	if (nd.ni_dvp == vp)
3446 		vrele(nd.ni_dvp);
3447 	else
3448 		vput(nd.ni_dvp);
3449 	vput(vp);
3450 	VFS_UNLOCK_GIANT(vfslocked);
3451 	return (error);
3452 }
3453 
3454 #ifdef COMPAT_43
3455 /*
3456  * Read a block of directory entries in a filesystem independent format.
3457  */
3458 #ifndef _SYS_SYSPROTO_H_
3459 struct ogetdirentries_args {
3460 	int	fd;
3461 	char	*buf;
3462 	u_int	count;
3463 	long	*basep;
3464 };
3465 #endif
3466 int
3467 ogetdirentries(td, uap)
3468 	struct thread *td;
3469 	register struct ogetdirentries_args /* {
3470 		int fd;
3471 		char *buf;
3472 		u_int count;
3473 		long *basep;
3474 	} */ *uap;
3475 {
3476 	struct vnode *vp;
3477 	struct file *fp;
3478 	struct uio auio, kuio;
3479 	struct iovec aiov, kiov;
3480 	struct dirent *dp, *edp;
3481 	caddr_t dirbuf;
3482 	int error, eofflag, readcnt;
3483 	long loff;
3484 
3485 	/* XXX arbitrary sanity limit on `count'. */
3486 	if (uap->count > 64 * 1024)
3487 		return (EINVAL);
3488 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3489 		return (error);
3490 	if ((fp->f_flag & FREAD) == 0) {
3491 		fdrop(fp, td);
3492 		return (EBADF);
3493 	}
3494 	vp = fp->f_vnode;
3495 unionread:
3496 	if (vp->v_type != VDIR) {
3497 		fdrop(fp, td);
3498 		return (EINVAL);
3499 	}
3500 	aiov.iov_base = uap->buf;
3501 	aiov.iov_len = uap->count;
3502 	auio.uio_iov = &aiov;
3503 	auio.uio_iovcnt = 1;
3504 	auio.uio_rw = UIO_READ;
3505 	auio.uio_segflg = UIO_USERSPACE;
3506 	auio.uio_td = td;
3507 	auio.uio_resid = uap->count;
3508 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3509 	loff = auio.uio_offset = fp->f_offset;
3510 #ifdef MAC
3511 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3512 	if (error) {
3513 		VOP_UNLOCK(vp, 0, td);
3514 		fdrop(fp, td);
3515 		return (error);
3516 	}
3517 #endif
3518 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3519 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3520 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3521 			    NULL, NULL);
3522 			fp->f_offset = auio.uio_offset;
3523 		} else
3524 #	endif
3525 	{
3526 		kuio = auio;
3527 		kuio.uio_iov = &kiov;
3528 		kuio.uio_segflg = UIO_SYSSPACE;
3529 		kiov.iov_len = uap->count;
3530 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3531 		kiov.iov_base = dirbuf;
3532 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3533 			    NULL, NULL);
3534 		fp->f_offset = kuio.uio_offset;
3535 		if (error == 0) {
3536 			readcnt = uap->count - kuio.uio_resid;
3537 			edp = (struct dirent *)&dirbuf[readcnt];
3538 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3539 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3540 					/*
3541 					 * The expected low byte of
3542 					 * dp->d_namlen is our dp->d_type.
3543 					 * The high MBZ byte of dp->d_namlen
3544 					 * is our dp->d_namlen.
3545 					 */
3546 					dp->d_type = dp->d_namlen;
3547 					dp->d_namlen = 0;
3548 #				else
3549 					/*
3550 					 * The dp->d_type is the high byte
3551 					 * of the expected dp->d_namlen,
3552 					 * so must be zero'ed.
3553 					 */
3554 					dp->d_type = 0;
3555 #				endif
3556 				if (dp->d_reclen > 0) {
3557 					dp = (struct dirent *)
3558 					    ((char *)dp + dp->d_reclen);
3559 				} else {
3560 					error = EIO;
3561 					break;
3562 				}
3563 			}
3564 			if (dp >= edp)
3565 				error = uiomove(dirbuf, readcnt, &auio);
3566 		}
3567 		FREE(dirbuf, M_TEMP);
3568 	}
3569 	VOP_UNLOCK(vp, 0, td);
3570 	if (error) {
3571 		fdrop(fp, td);
3572 		return (error);
3573 	}
3574 	if (uap->count == auio.uio_resid) {
3575 		if (union_dircheckp) {
3576 			error = union_dircheckp(td, &vp, fp);
3577 			if (error == -1)
3578 				goto unionread;
3579 			if (error) {
3580 				fdrop(fp, td);
3581 				return (error);
3582 			}
3583 		}
3584 		/*
3585 		 * XXX We could delay dropping the lock above but
3586 		 * union_dircheckp complicates things.
3587 		 */
3588 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3589 		if ((vp->v_vflag & VV_ROOT) &&
3590 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3591 			struct vnode *tvp = vp;
3592 			vp = vp->v_mount->mnt_vnodecovered;
3593 			VREF(vp);
3594 			fp->f_vnode = vp;
3595 			fp->f_data = vp;
3596 			fp->f_offset = 0;
3597 			vput(tvp);
3598 			goto unionread;
3599 		}
3600 		VOP_UNLOCK(vp, 0, td);
3601 	}
3602 	error = copyout(&loff, uap->basep, sizeof(long));
3603 	fdrop(fp, td);
3604 	td->td_retval[0] = uap->count - auio.uio_resid;
3605 	return (error);
3606 }
3607 #endif /* COMPAT_43 */
3608 
3609 /*
3610  * Read a block of directory entries in a filesystem independent format.
3611  */
3612 #ifndef _SYS_SYSPROTO_H_
3613 struct getdirentries_args {
3614 	int	fd;
3615 	char	*buf;
3616 	u_int	count;
3617 	long	*basep;
3618 };
3619 #endif
3620 int
3621 getdirentries(td, uap)
3622 	struct thread *td;
3623 	register struct getdirentries_args /* {
3624 		int fd;
3625 		char *buf;
3626 		u_int count;
3627 		long *basep;
3628 	} */ *uap;
3629 {
3630 	struct vnode *vp;
3631 	struct file *fp;
3632 	struct uio auio;
3633 	struct iovec aiov;
3634 	int vfslocked;
3635 	long loff;
3636 	int error, eofflag;
3637 
3638 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3639 		return (error);
3640 	if ((fp->f_flag & FREAD) == 0) {
3641 		fdrop(fp, td);
3642 		return (EBADF);
3643 	}
3644 	vp = fp->f_vnode;
3645 unionread:
3646 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3647 	if (vp->v_type != VDIR) {
3648 		error = EINVAL;
3649 		goto fail;
3650 	}
3651 	aiov.iov_base = uap->buf;
3652 	aiov.iov_len = uap->count;
3653 	auio.uio_iov = &aiov;
3654 	auio.uio_iovcnt = 1;
3655 	auio.uio_rw = UIO_READ;
3656 	auio.uio_segflg = UIO_USERSPACE;
3657 	auio.uio_td = td;
3658 	auio.uio_resid = uap->count;
3659 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3660 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3661 	loff = auio.uio_offset = fp->f_offset;
3662 #ifdef MAC
3663 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3664 	if (error == 0)
3665 #endif
3666 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3667 		    NULL);
3668 	fp->f_offset = auio.uio_offset;
3669 	VOP_UNLOCK(vp, 0, td);
3670 	if (error)
3671 		goto fail;
3672 	if (uap->count == auio.uio_resid) {
3673 		if (union_dircheckp) {
3674 			error = union_dircheckp(td, &vp, fp);
3675 			if (error == -1) {
3676 				VFS_UNLOCK_GIANT(vfslocked);
3677 				goto unionread;
3678 			}
3679 			if (error)
3680 				goto fail;
3681 		}
3682 		/*
3683 		 * XXX We could delay dropping the lock above but
3684 		 * union_dircheckp complicates things.
3685 		 */
3686 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3687 		if ((vp->v_vflag & VV_ROOT) &&
3688 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3689 			struct vnode *tvp = vp;
3690 			vp = vp->v_mount->mnt_vnodecovered;
3691 			VREF(vp);
3692 			fp->f_vnode = vp;
3693 			fp->f_data = vp;
3694 			fp->f_offset = 0;
3695 			vput(tvp);
3696 			VFS_UNLOCK_GIANT(vfslocked);
3697 			goto unionread;
3698 		}
3699 		VOP_UNLOCK(vp, 0, td);
3700 	}
3701 	if (uap->basep != NULL) {
3702 		error = copyout(&loff, uap->basep, sizeof(long));
3703 	}
3704 	td->td_retval[0] = uap->count - auio.uio_resid;
3705 fail:
3706 	VFS_UNLOCK_GIANT(vfslocked);
3707 	fdrop(fp, td);
3708 	return (error);
3709 }
3710 #ifndef _SYS_SYSPROTO_H_
3711 struct getdents_args {
3712 	int fd;
3713 	char *buf;
3714 	size_t count;
3715 };
3716 #endif
3717 int
3718 getdents(td, uap)
3719 	struct thread *td;
3720 	register struct getdents_args /* {
3721 		int fd;
3722 		char *buf;
3723 		u_int count;
3724 	} */ *uap;
3725 {
3726 	struct getdirentries_args ap;
3727 	ap.fd = uap->fd;
3728 	ap.buf = uap->buf;
3729 	ap.count = uap->count;
3730 	ap.basep = NULL;
3731 	return (getdirentries(td, &ap));
3732 }
3733 
3734 /*
3735  * Set the mode mask for creation of filesystem nodes.
3736  *
3737  * MP SAFE
3738  */
3739 #ifndef _SYS_SYSPROTO_H_
3740 struct umask_args {
3741 	int	newmask;
3742 };
3743 #endif
3744 int
3745 umask(td, uap)
3746 	struct thread *td;
3747 	struct umask_args /* {
3748 		int newmask;
3749 	} */ *uap;
3750 {
3751 	register struct filedesc *fdp;
3752 
3753 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3754 	fdp = td->td_proc->p_fd;
3755 	td->td_retval[0] = fdp->fd_cmask;
3756 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3757 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3758 	return (0);
3759 }
3760 
3761 /*
3762  * Void all references to file by ripping underlying filesystem
3763  * away from vnode.
3764  */
3765 #ifndef _SYS_SYSPROTO_H_
3766 struct revoke_args {
3767 	char	*path;
3768 };
3769 #endif
3770 int
3771 revoke(td, uap)
3772 	struct thread *td;
3773 	register struct revoke_args /* {
3774 		char *path;
3775 	} */ *uap;
3776 {
3777 	struct vnode *vp;
3778 	struct vattr vattr;
3779 	int error;
3780 	struct nameidata nd;
3781 	int vfslocked;
3782 
3783 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3784 	    uap->path, td);
3785 	if ((error = namei(&nd)) != 0)
3786 		return (error);
3787 	vfslocked = NDHASGIANT(&nd);
3788 	vp = nd.ni_vp;
3789 	NDFREE(&nd, NDF_ONLY_PNBUF);
3790 	if (vp->v_type != VCHR) {
3791 		error = EINVAL;
3792 		goto out;
3793 	}
3794 #ifdef MAC
3795 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3796 	if (error)
3797 		goto out;
3798 #endif
3799 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3800 	if (error)
3801 		goto out;
3802 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3803 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3804 		if (error)
3805 			goto out;
3806 	}
3807 	if (vcount(vp) > 1)
3808 		VOP_REVOKE(vp, REVOKEALL);
3809 out:
3810 	vput(vp);
3811 	VFS_UNLOCK_GIANT(vfslocked);
3812 	return (error);
3813 }
3814 
3815 /*
3816  * Convert a user file descriptor to a kernel file entry.
3817  * A reference on the file entry is held upon returning.
3818  */
3819 int
3820 getvnode(fdp, fd, fpp)
3821 	struct filedesc *fdp;
3822 	int fd;
3823 	struct file **fpp;
3824 {
3825 	int error;
3826 	struct file *fp;
3827 
3828 	fp = NULL;
3829 	if (fdp == NULL)
3830 		error = EBADF;
3831 	else {
3832 		FILEDESC_LOCK(fdp);
3833 		if ((u_int)fd >= fdp->fd_nfiles ||
3834 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3835 			error = EBADF;
3836 		else if (fp->f_vnode == NULL) {
3837 			fp = NULL;
3838 			error = EINVAL;
3839 		} else {
3840 			fhold(fp);
3841 			error = 0;
3842 		}
3843 		FILEDESC_UNLOCK(fdp);
3844 	}
3845 	*fpp = fp;
3846 	return (error);
3847 }
3848 
3849 /*
3850  * Get (NFS) file handle
3851  */
3852 #ifndef _SYS_SYSPROTO_H_
3853 struct lgetfh_args {
3854 	char	*fname;
3855 	fhandle_t *fhp;
3856 };
3857 #endif
3858 int
3859 lgetfh(td, uap)
3860 	struct thread *td;
3861 	register struct lgetfh_args *uap;
3862 {
3863 	struct nameidata nd;
3864 	fhandle_t fh;
3865 	register struct vnode *vp;
3866 	int vfslocked;
3867 	int error;
3868 
3869 	error = suser(td);
3870 	if (error)
3871 		return (error);
3872 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3873 	    UIO_USERSPACE, uap->fname, td);
3874 	error = namei(&nd);
3875 	if (error)
3876 		return (error);
3877 	vfslocked = NDHASGIANT(&nd);
3878 	NDFREE(&nd, NDF_ONLY_PNBUF);
3879 	vp = nd.ni_vp;
3880 	bzero(&fh, sizeof(fh));
3881 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3882 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3883 	vput(vp);
3884 	VFS_UNLOCK_GIANT(vfslocked);
3885 	if (error)
3886 		return (error);
3887 	error = copyout(&fh, uap->fhp, sizeof (fh));
3888 	return (error);
3889 }
3890 
3891 #ifndef _SYS_SYSPROTO_H_
3892 struct getfh_args {
3893 	char	*fname;
3894 	fhandle_t *fhp;
3895 };
3896 #endif
3897 int
3898 getfh(td, uap)
3899 	struct thread *td;
3900 	register struct getfh_args *uap;
3901 {
3902 	struct nameidata nd;
3903 	fhandle_t fh;
3904 	register struct vnode *vp;
3905 	int vfslocked;
3906 	int error;
3907 
3908 	error = suser(td);
3909 	if (error)
3910 		return (error);
3911 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3912 	    UIO_USERSPACE, uap->fname, td);
3913 	error = namei(&nd);
3914 	if (error)
3915 		return (error);
3916 	vfslocked = NDHASGIANT(&nd);
3917 	NDFREE(&nd, NDF_ONLY_PNBUF);
3918 	vp = nd.ni_vp;
3919 	bzero(&fh, sizeof(fh));
3920 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3921 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3922 	vput(vp);
3923 	VFS_UNLOCK_GIANT(vfslocked);
3924 	if (error)
3925 		return (error);
3926 	error = copyout(&fh, uap->fhp, sizeof (fh));
3927 	return (error);
3928 }
3929 
3930 /*
3931  * syscall for the rpc.lockd to use to translate a NFS file handle into
3932  * an open descriptor.
3933  *
3934  * warning: do not remove the suser() call or this becomes one giant
3935  * security hole.
3936  *
3937  * MP SAFE
3938  */
3939 #ifndef _SYS_SYSPROTO_H_
3940 struct fhopen_args {
3941 	const struct fhandle *u_fhp;
3942 	int flags;
3943 };
3944 #endif
3945 int
3946 fhopen(td, uap)
3947 	struct thread *td;
3948 	struct fhopen_args /* {
3949 		const struct fhandle *u_fhp;
3950 		int flags;
3951 	} */ *uap;
3952 {
3953 	struct proc *p = td->td_proc;
3954 	struct mount *mp;
3955 	struct vnode *vp;
3956 	struct fhandle fhp;
3957 	struct vattr vat;
3958 	struct vattr *vap = &vat;
3959 	struct flock lf;
3960 	struct file *fp;
3961 	register struct filedesc *fdp = p->p_fd;
3962 	int fmode, mode, error, type;
3963 	struct file *nfp;
3964 	int indx;
3965 
3966 	error = suser(td);
3967 	if (error)
3968 		return (error);
3969 	fmode = FFLAGS(uap->flags);
3970 	/* why not allow a non-read/write open for our lockd? */
3971 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3972 		return (EINVAL);
3973 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3974 	if (error)
3975 		return(error);
3976 	/* find the mount point */
3977 	mtx_lock(&Giant);
3978 	mp = vfs_getvfs(&fhp.fh_fsid);
3979 	if (mp == NULL) {
3980 		error = ESTALE;
3981 		goto out;
3982 	}
3983 	/* now give me my vnode, it gets returned to me locked */
3984 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3985 	if (error)
3986 		goto out;
3987 	/*
3988 	 * from now on we have to make sure not
3989 	 * to forget about the vnode
3990 	 * any error that causes an abort must vput(vp)
3991 	 * just set error = err and 'goto bad;'.
3992 	 */
3993 
3994 	/*
3995 	 * from vn_open
3996 	 */
3997 	if (vp->v_type == VLNK) {
3998 		error = EMLINK;
3999 		goto bad;
4000 	}
4001 	if (vp->v_type == VSOCK) {
4002 		error = EOPNOTSUPP;
4003 		goto bad;
4004 	}
4005 	mode = 0;
4006 	if (fmode & (FWRITE | O_TRUNC)) {
4007 		if (vp->v_type == VDIR) {
4008 			error = EISDIR;
4009 			goto bad;
4010 		}
4011 		error = vn_writechk(vp);
4012 		if (error)
4013 			goto bad;
4014 		mode |= VWRITE;
4015 	}
4016 	if (fmode & FREAD)
4017 		mode |= VREAD;
4018 	if (fmode & O_APPEND)
4019 		mode |= VAPPEND;
4020 #ifdef MAC
4021 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4022 	if (error)
4023 		goto bad;
4024 #endif
4025 	if (mode) {
4026 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4027 		if (error)
4028 			goto bad;
4029 	}
4030 	if (fmode & O_TRUNC) {
4031 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4032 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4033 			vrele(vp);
4034 			goto out;
4035 		}
4036 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4037 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4038 #ifdef MAC
4039 		/*
4040 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4041 		 * should be right.
4042 		 */
4043 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4044 		if (error == 0) {
4045 #endif
4046 			VATTR_NULL(vap);
4047 			vap->va_size = 0;
4048 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4049 #ifdef MAC
4050 		}
4051 #endif
4052 		vn_finished_write(mp);
4053 		if (error)
4054 			goto bad;
4055 	}
4056 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4057 	if (error)
4058 		goto bad;
4059 
4060 	if (fmode & FWRITE)
4061 		vp->v_writecount++;
4062 
4063 	/*
4064 	 * end of vn_open code
4065 	 */
4066 
4067 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4068 		if (fmode & FWRITE)
4069 			vp->v_writecount--;
4070 		goto bad;
4071 	}
4072 	/* An extra reference on `nfp' has been held for us by falloc(). */
4073 	fp = nfp;
4074 
4075 	nfp->f_vnode = vp;
4076 	nfp->f_data = vp;
4077 	nfp->f_flag = fmode & FMASK;
4078 	nfp->f_ops = &vnops;
4079 	nfp->f_type = DTYPE_VNODE;
4080 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4081 		lf.l_whence = SEEK_SET;
4082 		lf.l_start = 0;
4083 		lf.l_len = 0;
4084 		if (fmode & O_EXLOCK)
4085 			lf.l_type = F_WRLCK;
4086 		else
4087 			lf.l_type = F_RDLCK;
4088 		type = F_FLOCK;
4089 		if ((fmode & FNONBLOCK) == 0)
4090 			type |= F_WAIT;
4091 		VOP_UNLOCK(vp, 0, td);
4092 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4093 			    type)) != 0) {
4094 			/*
4095 			 * The lock request failed.  Normally close the
4096 			 * descriptor but handle the case where someone might
4097 			 * have dup()d or close()d it when we weren't looking.
4098 			 */
4099 			fdclose(fdp, fp, indx, td);
4100 
4101 			/*
4102 			 * release our private reference
4103 			 */
4104 			fdrop(fp, td);
4105 			goto out;
4106 		}
4107 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4108 		fp->f_flag |= FHASLOCK;
4109 	}
4110 
4111 	VOP_UNLOCK(vp, 0, td);
4112 	fdrop(fp, td);
4113 	mtx_unlock(&Giant);
4114 	td->td_retval[0] = indx;
4115 	return (0);
4116 
4117 bad:
4118 	vput(vp);
4119 out:
4120 	mtx_unlock(&Giant);
4121 	return (error);
4122 }
4123 
4124 /*
4125  * Stat an (NFS) file handle.
4126  *
4127  * MP SAFE
4128  */
4129 #ifndef _SYS_SYSPROTO_H_
4130 struct fhstat_args {
4131 	struct fhandle *u_fhp;
4132 	struct stat *sb;
4133 };
4134 #endif
4135 int
4136 fhstat(td, uap)
4137 	struct thread *td;
4138 	register struct fhstat_args /* {
4139 		struct fhandle *u_fhp;
4140 		struct stat *sb;
4141 	} */ *uap;
4142 {
4143 	struct stat sb;
4144 	fhandle_t fh;
4145 	struct mount *mp;
4146 	struct vnode *vp;
4147 	int error;
4148 
4149 	error = suser(td);
4150 	if (error)
4151 		return (error);
4152 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4153 	if (error)
4154 		return (error);
4155 	mtx_lock(&Giant);
4156 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4157 		mtx_unlock(&Giant);
4158 		return (ESTALE);
4159 	}
4160 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4161 		mtx_unlock(&Giant);
4162 		return (error);
4163 	}
4164 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4165 	vput(vp);
4166 	mtx_unlock(&Giant);
4167 	if (error)
4168 		return (error);
4169 	error = copyout(&sb, uap->sb, sizeof(sb));
4170 	return (error);
4171 }
4172 
4173 /*
4174  * Implement fstatfs() for (NFS) file handles.
4175  *
4176  * MP SAFE
4177  */
4178 #ifndef _SYS_SYSPROTO_H_
4179 struct fhstatfs_args {
4180 	struct fhandle *u_fhp;
4181 	struct statfs *buf;
4182 };
4183 #endif
4184 int
4185 fhstatfs(td, uap)
4186 	struct thread *td;
4187 	struct fhstatfs_args /* {
4188 		struct fhandle *u_fhp;
4189 		struct statfs *buf;
4190 	} */ *uap;
4191 {
4192 	struct statfs sf;
4193 	fhandle_t fh;
4194 	int error;
4195 
4196 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4197 	if (error)
4198 		return (error);
4199 	error = kern_fhstatfs(td, fh, &sf);
4200 	if (error)
4201 		return (error);
4202 	return (copyout(&sf, uap->buf, sizeof(sf)));
4203 }
4204 
4205 int
4206 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4207 {
4208 	struct statfs *sp;
4209 	struct mount *mp;
4210 	struct vnode *vp;
4211 	int error;
4212 
4213 	error = suser(td);
4214 	if (error)
4215 		return (error);
4216 	mtx_lock(&Giant);
4217 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4218 		mtx_unlock(&Giant);
4219 		return (ESTALE);
4220 	}
4221 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4222 	if (error) {
4223 		mtx_unlock(&Giant);
4224 		return (error);
4225 	}
4226 	mp = vp->v_mount;
4227 	sp = &mp->mnt_stat;
4228 	vput(vp);
4229 	error = prison_canseemount(td->td_ucred, mp);
4230 	if (error)
4231 		return (error);
4232 #ifdef MAC
4233 	error = mac_check_mount_stat(td->td_ucred, mp);
4234 	if (error) {
4235 		mtx_unlock(&Giant);
4236 		return (error);
4237 	}
4238 #endif
4239 	/*
4240 	 * Set these in case the underlying filesystem fails to do so.
4241 	 */
4242 	sp->f_version = STATFS_VERSION;
4243 	sp->f_namemax = NAME_MAX;
4244 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4245 	error = VFS_STATFS(mp, sp, td);
4246 	mtx_unlock(&Giant);
4247 	if (error)
4248 		return (error);
4249 	*buf = *sp;
4250 	return (0);
4251 }
4252 
4253 /*
4254  * Syscall to push extended attribute configuration information into the
4255  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4256  * a command (int cmd), and attribute name and misc data.  For now, the
4257  * attribute name is left in userspace for consumption by the VFS_op.
4258  * It will probably be changed to be copied into sysspace by the
4259  * syscall in the future, once issues with various consumers of the
4260  * attribute code have raised their hands.
4261  *
4262  * Currently this is used only by UFS Extended Attributes.
4263  */
4264 int
4265 extattrctl(td, uap)
4266 	struct thread *td;
4267 	struct extattrctl_args /* {
4268 		const char *path;
4269 		int cmd;
4270 		const char *filename;
4271 		int attrnamespace;
4272 		const char *attrname;
4273 	} */ *uap;
4274 {
4275 	struct vnode *filename_vp;
4276 	struct nameidata nd;
4277 	struct mount *mp, *mp_writable;
4278 	char attrname[EXTATTR_MAXNAMELEN];
4279 	int vfslocked, fnvfslocked, error;
4280 
4281 	/*
4282 	 * uap->attrname is not always defined.  We check again later when we
4283 	 * invoke the VFS call so as to pass in NULL there if needed.
4284 	 */
4285 	if (uap->attrname != NULL) {
4286 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4287 		    NULL);
4288 		if (error)
4289 			return (error);
4290 	}
4291 
4292 	vfslocked = fnvfslocked = 0;
4293 	/*
4294 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4295 	 * which VFS_EXTATTRCTL() will later release.
4296 	 */
4297 	filename_vp = NULL;
4298 	if (uap->filename != NULL) {
4299 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF,
4300 		    UIO_USERSPACE, uap->filename, td);
4301 		error = namei(&nd);
4302 		if (error)
4303 			return (error);
4304 		fnvfslocked = NDHASGIANT(&nd);
4305 		filename_vp = nd.ni_vp;
4306 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4307 	}
4308 
4309 	/* uap->path is always defined. */
4310 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4311 	error = namei(&nd);
4312 	if (error) {
4313 		if (filename_vp != NULL)
4314 			vput(filename_vp);
4315 		goto out;
4316 	}
4317 	vfslocked = NDHASGIANT(&nd);
4318 	mp = nd.ni_vp->v_mount;
4319 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4320 	NDFREE(&nd, 0);
4321 	if (error) {
4322 		if (filename_vp != NULL)
4323 			vput(filename_vp);
4324 		goto out;
4325 	}
4326 
4327 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4328 	    uap->attrname != NULL ? attrname : NULL, td);
4329 
4330 	vn_finished_write(mp_writable);
4331 	/*
4332 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4333 	 * filename_vp, so vrele it if it is defined.
4334 	 */
4335 	if (filename_vp != NULL)
4336 		vrele(filename_vp);
4337 out:
4338 	VFS_UNLOCK_GIANT(fnvfslocked);
4339 	VFS_UNLOCK_GIANT(vfslocked);
4340 	return (error);
4341 }
4342 
4343 /*-
4344  * Set a named extended attribute on a file or directory
4345  *
4346  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4347  *            kernelspace string pointer "attrname", userspace buffer
4348  *            pointer "data", buffer length "nbytes", thread "td".
4349  * Returns: 0 on success, an error number otherwise
4350  * Locks: none
4351  * References: vp must be a valid reference for the duration of the call
4352  */
4353 static int
4354 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4355     void *data, size_t nbytes, struct thread *td)
4356 {
4357 	struct mount *mp;
4358 	struct uio auio;
4359 	struct iovec aiov;
4360 	ssize_t cnt;
4361 	int error;
4362 
4363 	VFS_ASSERT_GIANT(vp->v_mount);
4364 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4365 	if (error)
4366 		return (error);
4367 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4368 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4369 
4370 	aiov.iov_base = data;
4371 	aiov.iov_len = nbytes;
4372 	auio.uio_iov = &aiov;
4373 	auio.uio_iovcnt = 1;
4374 	auio.uio_offset = 0;
4375 	if (nbytes > INT_MAX) {
4376 		error = EINVAL;
4377 		goto done;
4378 	}
4379 	auio.uio_resid = nbytes;
4380 	auio.uio_rw = UIO_WRITE;
4381 	auio.uio_segflg = UIO_USERSPACE;
4382 	auio.uio_td = td;
4383 	cnt = nbytes;
4384 
4385 #ifdef MAC
4386 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4387 	    attrname, &auio);
4388 	if (error)
4389 		goto done;
4390 #endif
4391 
4392 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4393 	    td->td_ucred, td);
4394 	cnt -= auio.uio_resid;
4395 	td->td_retval[0] = cnt;
4396 
4397 done:
4398 	VOP_UNLOCK(vp, 0, td);
4399 	vn_finished_write(mp);
4400 	return (error);
4401 }
4402 
4403 int
4404 extattr_set_fd(td, uap)
4405 	struct thread *td;
4406 	struct extattr_set_fd_args /* {
4407 		int fd;
4408 		int attrnamespace;
4409 		const char *attrname;
4410 		void *data;
4411 		size_t nbytes;
4412 	} */ *uap;
4413 {
4414 	struct file *fp;
4415 	char attrname[EXTATTR_MAXNAMELEN];
4416 	int vfslocked, error;
4417 
4418 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4419 	if (error)
4420 		return (error);
4421 
4422 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4423 	if (error)
4424 		return (error);
4425 
4426 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4427 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4428 	    attrname, uap->data, uap->nbytes, td);
4429 	fdrop(fp, td);
4430 	VFS_UNLOCK_GIANT(vfslocked);
4431 
4432 	return (error);
4433 }
4434 
4435 int
4436 extattr_set_file(td, uap)
4437 	struct thread *td;
4438 	struct extattr_set_file_args /* {
4439 		const char *path;
4440 		int attrnamespace;
4441 		const char *attrname;
4442 		void *data;
4443 		size_t nbytes;
4444 	} */ *uap;
4445 {
4446 	struct nameidata nd;
4447 	char attrname[EXTATTR_MAXNAMELEN];
4448 	int vfslocked, error;
4449 
4450 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4451 	if (error)
4452 		return (error);
4453 
4454 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4455 	error = namei(&nd);
4456 	if (error)
4457 		return (error);
4458 	NDFREE(&nd, NDF_ONLY_PNBUF);
4459 
4460 	vfslocked = NDHASGIANT(&nd);
4461 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4462 	    uap->data, uap->nbytes, td);
4463 
4464 	vrele(nd.ni_vp);
4465 	VFS_UNLOCK_GIANT(vfslocked);
4466 	return (error);
4467 }
4468 
4469 int
4470 extattr_set_link(td, uap)
4471 	struct thread *td;
4472 	struct extattr_set_link_args /* {
4473 		const char *path;
4474 		int attrnamespace;
4475 		const char *attrname;
4476 		void *data;
4477 		size_t nbytes;
4478 	} */ *uap;
4479 {
4480 	struct nameidata nd;
4481 	char attrname[EXTATTR_MAXNAMELEN];
4482 	int vfslocked, error;
4483 
4484 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4485 	if (error)
4486 		return (error);
4487 
4488 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4489 	error = namei(&nd);
4490 	if (error)
4491 		return (error);
4492 	NDFREE(&nd, NDF_ONLY_PNBUF);
4493 
4494 	vfslocked = NDHASGIANT(&nd);
4495 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4496 	    uap->data, uap->nbytes, td);
4497 
4498 	vrele(nd.ni_vp);
4499 	VFS_UNLOCK_GIANT(vfslocked);
4500 	return (error);
4501 }
4502 
4503 /*-
4504  * Get a named extended attribute on a file or directory
4505  *
4506  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4507  *            kernelspace string pointer "attrname", userspace buffer
4508  *            pointer "data", buffer length "nbytes", thread "td".
4509  * Returns: 0 on success, an error number otherwise
4510  * Locks: none
4511  * References: vp must be a valid reference for the duration of the call
4512  */
4513 static int
4514 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4515     void *data, size_t nbytes, struct thread *td)
4516 {
4517 	struct uio auio, *auiop;
4518 	struct iovec aiov;
4519 	ssize_t cnt;
4520 	size_t size, *sizep;
4521 	int error;
4522 
4523 	VFS_ASSERT_GIANT(vp->v_mount);
4524 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4525 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4526 
4527 	/*
4528 	 * Slightly unusual semantics: if the user provides a NULL data
4529 	 * pointer, they don't want to receive the data, just the
4530 	 * maximum read length.
4531 	 */
4532 	auiop = NULL;
4533 	sizep = NULL;
4534 	cnt = 0;
4535 	if (data != NULL) {
4536 		aiov.iov_base = data;
4537 		aiov.iov_len = nbytes;
4538 		auio.uio_iov = &aiov;
4539 		auio.uio_iovcnt = 1;
4540 		auio.uio_offset = 0;
4541 		if (nbytes > INT_MAX) {
4542 			error = EINVAL;
4543 			goto done;
4544 		}
4545 		auio.uio_resid = nbytes;
4546 		auio.uio_rw = UIO_READ;
4547 		auio.uio_segflg = UIO_USERSPACE;
4548 		auio.uio_td = td;
4549 		auiop = &auio;
4550 		cnt = nbytes;
4551 	} else
4552 		sizep = &size;
4553 
4554 #ifdef MAC
4555 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4556 	    attrname, &auio);
4557 	if (error)
4558 		goto done;
4559 #endif
4560 
4561 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4562 	    td->td_ucred, td);
4563 
4564 	if (auiop != NULL) {
4565 		cnt -= auio.uio_resid;
4566 		td->td_retval[0] = cnt;
4567 	} else
4568 		td->td_retval[0] = size;
4569 
4570 done:
4571 	VOP_UNLOCK(vp, 0, td);
4572 	return (error);
4573 }
4574 
4575 int
4576 extattr_get_fd(td, uap)
4577 	struct thread *td;
4578 	struct extattr_get_fd_args /* {
4579 		int fd;
4580 		int attrnamespace;
4581 		const char *attrname;
4582 		void *data;
4583 		size_t nbytes;
4584 	} */ *uap;
4585 {
4586 	struct file *fp;
4587 	char attrname[EXTATTR_MAXNAMELEN];
4588 	int vfslocked, error;
4589 
4590 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4591 	if (error)
4592 		return (error);
4593 
4594 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4595 	if (error)
4596 		return (error);
4597 
4598 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4599 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4600 	    attrname, uap->data, uap->nbytes, td);
4601 
4602 	fdrop(fp, td);
4603 	VFS_UNLOCK_GIANT(vfslocked);
4604 	return (error);
4605 }
4606 
4607 int
4608 extattr_get_file(td, uap)
4609 	struct thread *td;
4610 	struct extattr_get_file_args /* {
4611 		const char *path;
4612 		int attrnamespace;
4613 		const char *attrname;
4614 		void *data;
4615 		size_t nbytes;
4616 	} */ *uap;
4617 {
4618 	struct nameidata nd;
4619 	char attrname[EXTATTR_MAXNAMELEN];
4620 	int vfslocked, error;
4621 
4622 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4623 	if (error)
4624 		return (error);
4625 
4626 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4627 	error = namei(&nd);
4628 	if (error)
4629 		return (error);
4630 	NDFREE(&nd, NDF_ONLY_PNBUF);
4631 
4632 	vfslocked = NDHASGIANT(&nd);
4633 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4634 	    uap->data, uap->nbytes, td);
4635 
4636 	vrele(nd.ni_vp);
4637 	VFS_UNLOCK_GIANT(vfslocked);
4638 	return (error);
4639 }
4640 
4641 int
4642 extattr_get_link(td, uap)
4643 	struct thread *td;
4644 	struct extattr_get_link_args /* {
4645 		const char *path;
4646 		int attrnamespace;
4647 		const char *attrname;
4648 		void *data;
4649 		size_t nbytes;
4650 	} */ *uap;
4651 {
4652 	struct nameidata nd;
4653 	char attrname[EXTATTR_MAXNAMELEN];
4654 	int vfslocked, error;
4655 
4656 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4657 	if (error)
4658 		return (error);
4659 
4660 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4661 	error = namei(&nd);
4662 	if (error)
4663 		return (error);
4664 	NDFREE(&nd, NDF_ONLY_PNBUF);
4665 
4666 	vfslocked = NDHASGIANT(&nd);
4667 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4668 	    uap->data, uap->nbytes, td);
4669 
4670 	vrele(nd.ni_vp);
4671 	VFS_UNLOCK_GIANT(vfslocked);
4672 	return (error);
4673 }
4674 
4675 /*
4676  * extattr_delete_vp(): Delete a named extended attribute on a file or
4677  *                      directory
4678  *
4679  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4680  *            kernelspace string pointer "attrname", proc "p"
4681  * Returns: 0 on success, an error number otherwise
4682  * Locks: none
4683  * References: vp must be a valid reference for the duration of the call
4684  */
4685 static int
4686 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4687     struct thread *td)
4688 {
4689 	struct mount *mp;
4690 	int error;
4691 
4692 	VFS_ASSERT_GIANT(vp->v_mount);
4693 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4694 	if (error)
4695 		return (error);
4696 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4697 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4698 
4699 #ifdef MAC
4700 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4701 	    attrname);
4702 	if (error)
4703 		goto done;
4704 #endif
4705 
4706 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4707 	    td);
4708 	if (error == EOPNOTSUPP)
4709 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4710 		    td->td_ucred, td);
4711 #ifdef MAC
4712 done:
4713 #endif
4714 	VOP_UNLOCK(vp, 0, td);
4715 	vn_finished_write(mp);
4716 	return (error);
4717 }
4718 
4719 int
4720 extattr_delete_fd(td, uap)
4721 	struct thread *td;
4722 	struct extattr_delete_fd_args /* {
4723 		int fd;
4724 		int attrnamespace;
4725 		const char *attrname;
4726 	} */ *uap;
4727 {
4728 	struct file *fp;
4729 	char attrname[EXTATTR_MAXNAMELEN];
4730 	int vfslocked, error;
4731 
4732 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4733 	if (error)
4734 		return (error);
4735 
4736 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4737 	if (error)
4738 		return (error);
4739 
4740 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4741 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4742 	    attrname, td);
4743 	fdrop(fp, td);
4744 	VFS_UNLOCK_GIANT(vfslocked);
4745 	return (error);
4746 }
4747 
4748 int
4749 extattr_delete_file(td, uap)
4750 	struct thread *td;
4751 	struct extattr_delete_file_args /* {
4752 		const char *path;
4753 		int attrnamespace;
4754 		const char *attrname;
4755 	} */ *uap;
4756 {
4757 	struct nameidata nd;
4758 	char attrname[EXTATTR_MAXNAMELEN];
4759 	int vfslocked, error;
4760 
4761 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4762 	if (error)
4763 		return(error);
4764 
4765 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4766 	error = namei(&nd);
4767 	if (error)
4768 		return(error);
4769 	NDFREE(&nd, NDF_ONLY_PNBUF);
4770 
4771 	vfslocked = NDHASGIANT(&nd);
4772 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4773 	vrele(nd.ni_vp);
4774 	VFS_UNLOCK_GIANT(vfslocked);
4775 	return(error);
4776 }
4777 
4778 int
4779 extattr_delete_link(td, uap)
4780 	struct thread *td;
4781 	struct extattr_delete_link_args /* {
4782 		const char *path;
4783 		int attrnamespace;
4784 		const char *attrname;
4785 	} */ *uap;
4786 {
4787 	struct nameidata nd;
4788 	char attrname[EXTATTR_MAXNAMELEN];
4789 	int vfslocked, error;
4790 
4791 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4792 	if (error)
4793 		return(error);
4794 
4795 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4796 	error = namei(&nd);
4797 	if (error)
4798 		return(error);
4799 	NDFREE(&nd, NDF_ONLY_PNBUF);
4800 
4801 	vfslocked = NDHASGIANT(&nd);
4802 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4803 	vrele(nd.ni_vp);
4804 	VFS_UNLOCK_GIANT(vfslocked);
4805 	return(error);
4806 }
4807 
4808 /*-
4809  * Retrieve a list of extended attributes on a file or directory.
4810  *
4811  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4812  *            userspace buffer pointer "data", buffer length "nbytes",
4813  *            thread "td".
4814  * Returns: 0 on success, an error number otherwise
4815  * Locks: none
4816  * References: vp must be a valid reference for the duration of the call
4817  */
4818 static int
4819 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4820     size_t nbytes, struct thread *td)
4821 {
4822 	struct uio auio, *auiop;
4823 	size_t size, *sizep;
4824 	struct iovec aiov;
4825 	ssize_t cnt;
4826 	int error;
4827 
4828 	VFS_ASSERT_GIANT(vp->v_mount);
4829 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4830 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4831 
4832 	auiop = NULL;
4833 	sizep = NULL;
4834 	cnt = 0;
4835 	if (data != NULL) {
4836 		aiov.iov_base = data;
4837 		aiov.iov_len = nbytes;
4838 		auio.uio_iov = &aiov;
4839 		auio.uio_iovcnt = 1;
4840 		auio.uio_offset = 0;
4841 		if (nbytes > INT_MAX) {
4842 			error = EINVAL;
4843 			goto done;
4844 		}
4845 		auio.uio_resid = nbytes;
4846 		auio.uio_rw = UIO_READ;
4847 		auio.uio_segflg = UIO_USERSPACE;
4848 		auio.uio_td = td;
4849 		auiop = &auio;
4850 		cnt = nbytes;
4851 	} else
4852 		sizep = &size;
4853 
4854 #ifdef MAC
4855 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4856 	if (error)
4857 		goto done;
4858 #endif
4859 
4860 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4861 	    td->td_ucred, td);
4862 
4863 	if (auiop != NULL) {
4864 		cnt -= auio.uio_resid;
4865 		td->td_retval[0] = cnt;
4866 	} else
4867 		td->td_retval[0] = size;
4868 
4869 done:
4870 	VOP_UNLOCK(vp, 0, td);
4871 	return (error);
4872 }
4873 
4874 
4875 int
4876 extattr_list_fd(td, uap)
4877 	struct thread *td;
4878 	struct extattr_list_fd_args /* {
4879 		int fd;
4880 		int attrnamespace;
4881 		void *data;
4882 		size_t nbytes;
4883 	} */ *uap;
4884 {
4885 	struct file *fp;
4886 	int vfslocked, error;
4887 
4888 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4889 	if (error)
4890 		return (error);
4891 
4892 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4893 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4894 	    uap->nbytes, td);
4895 
4896 	fdrop(fp, td);
4897 	VFS_UNLOCK_GIANT(vfslocked);
4898 	return (error);
4899 }
4900 
4901 int
4902 extattr_list_file(td, uap)
4903 	struct thread*td;
4904 	struct extattr_list_file_args /* {
4905 		const char *path;
4906 		int attrnamespace;
4907 		void *data;
4908 		size_t nbytes;
4909 	} */ *uap;
4910 {
4911 	struct nameidata nd;
4912 	int vfslocked, error;
4913 
4914 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td);
4915 	error = namei(&nd);
4916 	if (error)
4917 		return (error);
4918 	NDFREE(&nd, NDF_ONLY_PNBUF);
4919 
4920 	vfslocked = NDHASGIANT(&nd);
4921 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4922 	    uap->nbytes, td);
4923 
4924 	vrele(nd.ni_vp);
4925 	VFS_UNLOCK_GIANT(vfslocked);
4926 	return (error);
4927 }
4928 
4929 int
4930 extattr_list_link(td, uap)
4931 	struct thread*td;
4932 	struct extattr_list_link_args /* {
4933 		const char *path;
4934 		int attrnamespace;
4935 		void *data;
4936 		size_t nbytes;
4937 	} */ *uap;
4938 {
4939 	struct nameidata nd;
4940 	int vfslocked, error;
4941 
4942 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td);
4943 	error = namei(&nd);
4944 	if (error)
4945 		return (error);
4946 	NDFREE(&nd, NDF_ONLY_PNBUF);
4947 
4948 	vfslocked = NDHASGIANT(&nd);
4949 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4950 	    uap->nbytes, td);
4951 
4952 	vrele(nd.ni_vp);
4953 	VFS_UNLOCK_GIANT(vfslocked);
4954 	return (error);
4955 }
4956