xref: /freebsd/sys/kern/vfs_extattr.c (revision 84ee9401a3fc8d3c22424266f421a928989cd692)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 
126 	mtx_lock(&mountlist_mtx);
127 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129 			nmp = TAILQ_NEXT(mp, mnt_list);
130 			continue;
131 		}
132 		vfslocked = VFS_LOCK_GIANT(mp);
133 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
134 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
135 			MNT_ILOCK(mp);
136 			mp->mnt_noasync++;
137 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
138 			MNT_IUNLOCK(mp);
139 			vfs_msync(mp, MNT_NOWAIT);
140 			VFS_SYNC(mp, MNT_NOWAIT, td);
141 			MNT_ILOCK(mp);
142 			mp->mnt_noasync--;
143 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
144 			    mp->mnt_noasync == 0)
145 				mp->mnt_kern_flag |= MNTK_ASYNC;
146 			MNT_IUNLOCK(mp);
147 			vn_finished_write(mp);
148 		}
149 		VFS_UNLOCK_GIANT(vfslocked);
150 		mtx_lock(&mountlist_mtx);
151 		nmp = TAILQ_NEXT(mp, mnt_list);
152 		vfs_unbusy(mp, td);
153 	}
154 	mtx_unlock(&mountlist_mtx);
155 	return (0);
156 }
157 
158 /* XXX PRISON: could be per prison flag */
159 static int prison_quotas;
160 #if 0
161 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
162 #endif
163 
164 /*
165  * Change filesystem quotas.
166  *
167  * MP SAFE
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp, *vmp;
188 	int vfslocked;
189 	int error;
190 	struct nameidata nd;
191 
192 	AUDIT_ARG(cmd, uap->cmd);
193 	AUDIT_ARG(uid, uap->uid);
194 	if (jailed(td->td_ucred) && !prison_quotas)
195 		return (EPERM);
196 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
197 	   UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0)
199 		return (error);
200 	vfslocked = NDHASGIANT(&nd);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
203 	mp = nd.ni_vp->v_mount;
204 	vrele(nd.ni_vp);
205 	if (error)
206 		goto out;
207 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
208 	vn_finished_write(vmp);
209 out:
210 	VFS_UNLOCK_GIANT(vfslocked);
211 	return (error);
212 }
213 
214 /*
215  * Get filesystem statistics.
216  */
217 #ifndef _SYS_SYSPROTO_H_
218 struct statfs_args {
219 	char *path;
220 	struct statfs *buf;
221 };
222 #endif
223 int
224 statfs(td, uap)
225 	struct thread *td;
226 	register struct statfs_args /* {
227 		char *path;
228 		struct statfs *buf;
229 	} */ *uap;
230 {
231 	struct statfs sf;
232 	int error;
233 
234 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235 	if (error == 0)
236 		error = copyout(&sf, uap->buf, sizeof(sf));
237 	return (error);
238 }
239 
240 int
241 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242     struct statfs *buf)
243 {
244 	struct mount *mp;
245 	struct statfs *sp, sb;
246 	int vfslocked;
247 	int error;
248 	struct nameidata nd;
249 
250 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
251 	    pathseg, path, td);
252 	error = namei(&nd);
253 	if (error)
254 		return (error);
255 	vfslocked = NDHASGIANT(&nd);
256 	mp = nd.ni_vp->v_mount;
257 	vfs_ref(mp);
258 	NDFREE(&nd, NDF_ONLY_PNBUF);
259 	vput(nd.ni_vp);
260 #ifdef MAC
261 	error = mac_check_mount_stat(td->td_ucred, mp);
262 	if (error)
263 		goto out;
264 #endif
265 	/*
266 	 * Set these in case the underlying filesystem fails to do so.
267 	 */
268 	sp = &mp->mnt_stat;
269 	sp->f_version = STATFS_VERSION;
270 	sp->f_namemax = NAME_MAX;
271 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
272 	error = VFS_STATFS(mp, sp, td);
273 	if (error)
274 		goto out;
275 	if (suser(td)) {
276 		bcopy(sp, &sb, sizeof(sb));
277 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
278 		prison_enforce_statfs(td->td_ucred, mp, &sb);
279 		sp = &sb;
280 	}
281 	*buf = *sp;
282 out:
283 	vfs_rel(mp);
284 	VFS_UNLOCK_GIANT(vfslocked);
285 	if (mtx_owned(&Giant))
286 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
287 	return (error);
288 }
289 
290 /*
291  * Get filesystem statistics.
292  */
293 #ifndef _SYS_SYSPROTO_H_
294 struct fstatfs_args {
295 	int fd;
296 	struct statfs *buf;
297 };
298 #endif
299 int
300 fstatfs(td, uap)
301 	struct thread *td;
302 	register struct fstatfs_args /* {
303 		int fd;
304 		struct statfs *buf;
305 	} */ *uap;
306 {
307 	struct statfs sf;
308 	int error;
309 
310 	error = kern_fstatfs(td, uap->fd, &sf);
311 	if (error == 0)
312 		error = copyout(&sf, uap->buf, sizeof(sf));
313 	return (error);
314 }
315 
316 int
317 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
318 {
319 	struct file *fp;
320 	struct mount *mp;
321 	struct statfs *sp, sb;
322 	int vfslocked;
323 	struct vnode *vp;
324 	int error;
325 
326 	AUDIT_ARG(fd, fd);
327 	error = getvnode(td->td_proc->p_fd, fd, &fp);
328 	if (error)
329 		return (error);
330 	vp = fp->f_vnode;
331 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
332 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
333 #ifdef AUDIT
334 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
335 #endif
336 	mp = vp->v_mount;
337 	if (mp)
338 		vfs_ref(mp);
339 	VOP_UNLOCK(vp, 0, td);
340 	fdrop(fp, td);
341 	if (vp->v_iflag & VI_DOOMED) {
342 		error = EBADF;
343 		goto out;
344 	}
345 #ifdef MAC
346 	error = mac_check_mount_stat(td->td_ucred, mp);
347 	if (error)
348 		goto out;
349 #endif
350 	/*
351 	 * Set these in case the underlying filesystem fails to do so.
352 	 */
353 	sp = &mp->mnt_stat;
354 	sp->f_version = STATFS_VERSION;
355 	sp->f_namemax = NAME_MAX;
356 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357 	error = VFS_STATFS(mp, sp, td);
358 	if (error)
359 		goto out;
360 	if (suser(td)) {
361 		bcopy(sp, &sb, sizeof(sb));
362 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
363 		prison_enforce_statfs(td->td_ucred, mp, &sb);
364 		sp = &sb;
365 	}
366 	*buf = *sp;
367 out:
368 	if (mp)
369 		vfs_rel(mp);
370 	VFS_UNLOCK_GIANT(vfslocked);
371 	return (error);
372 }
373 
374 /*
375  * Get statistics on all filesystems.
376  */
377 #ifndef _SYS_SYSPROTO_H_
378 struct getfsstat_args {
379 	struct statfs *buf;
380 	long bufsize;
381 	int flags;
382 };
383 #endif
384 int
385 getfsstat(td, uap)
386 	struct thread *td;
387 	register struct getfsstat_args /* {
388 		struct statfs *buf;
389 		long bufsize;
390 		int flags;
391 	} */ *uap;
392 {
393 
394 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
395 	    uap->flags));
396 }
397 
398 /*
399  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
400  * 	The caller is responsible for freeing memory which will be allocated
401  *	in '*buf'.
402  */
403 int
404 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
405     enum uio_seg bufseg, int flags)
406 {
407 	struct mount *mp, *nmp;
408 	struct statfs *sfsp, *sp, sb;
409 	size_t count, maxcount;
410 	int vfslocked;
411 	int error;
412 
413 	maxcount = bufsize / sizeof(struct statfs);
414 	if (bufsize == 0)
415 		sfsp = NULL;
416 	else if (bufseg == UIO_USERSPACE)
417 		sfsp = *buf;
418 	else /* if (bufseg == UIO_SYSSPACE) */ {
419 		count = 0;
420 		mtx_lock(&mountlist_mtx);
421 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
422 			count++;
423 		}
424 		mtx_unlock(&mountlist_mtx);
425 		if (maxcount > count)
426 			maxcount = count;
427 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
428 		    M_WAITOK);
429 	}
430 	count = 0;
431 	mtx_lock(&mountlist_mtx);
432 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
433 		if (prison_canseemount(td->td_ucred, mp) != 0) {
434 			nmp = TAILQ_NEXT(mp, mnt_list);
435 			continue;
436 		}
437 #ifdef MAC
438 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
439 			nmp = TAILQ_NEXT(mp, mnt_list);
440 			continue;
441 		}
442 #endif
443 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
444 			nmp = TAILQ_NEXT(mp, mnt_list);
445 			continue;
446 		}
447 		vfslocked = VFS_LOCK_GIANT(mp);
448 		if (sfsp && count < maxcount) {
449 			sp = &mp->mnt_stat;
450 			/*
451 			 * Set these in case the underlying filesystem
452 			 * fails to do so.
453 			 */
454 			sp->f_version = STATFS_VERSION;
455 			sp->f_namemax = NAME_MAX;
456 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
457 			/*
458 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
459 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
460 			 * overrides MNT_WAIT.
461 			 */
462 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
463 			    (flags & MNT_WAIT)) &&
464 			    (error = VFS_STATFS(mp, sp, td))) {
465 				VFS_UNLOCK_GIANT(vfslocked);
466 				mtx_lock(&mountlist_mtx);
467 				nmp = TAILQ_NEXT(mp, mnt_list);
468 				vfs_unbusy(mp, td);
469 				continue;
470 			}
471 			if (suser(td)) {
472 				bcopy(sp, &sb, sizeof(sb));
473 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
474 				prison_enforce_statfs(td->td_ucred, mp, &sb);
475 				sp = &sb;
476 			}
477 			if (bufseg == UIO_SYSSPACE)
478 				bcopy(sp, sfsp, sizeof(*sp));
479 			else /* if (bufseg == UIO_USERSPACE) */ {
480 				error = copyout(sp, sfsp, sizeof(*sp));
481 				if (error) {
482 					vfs_unbusy(mp, td);
483 					VFS_UNLOCK_GIANT(vfslocked);
484 					return (error);
485 				}
486 			}
487 			sfsp++;
488 		}
489 		VFS_UNLOCK_GIANT(vfslocked);
490 		count++;
491 		mtx_lock(&mountlist_mtx);
492 		nmp = TAILQ_NEXT(mp, mnt_list);
493 		vfs_unbusy(mp, td);
494 	}
495 	mtx_unlock(&mountlist_mtx);
496 	if (sfsp && count > maxcount)
497 		td->td_retval[0] = maxcount;
498 	else
499 		td->td_retval[0] = count;
500 	return (0);
501 }
502 
503 #ifdef COMPAT_FREEBSD4
504 /*
505  * Get old format filesystem statistics.
506  */
507 static void cvtstatfs(struct statfs *, struct ostatfs *);
508 
509 #ifndef _SYS_SYSPROTO_H_
510 struct freebsd4_statfs_args {
511 	char *path;
512 	struct ostatfs *buf;
513 };
514 #endif
515 int
516 freebsd4_statfs(td, uap)
517 	struct thread *td;
518 	struct freebsd4_statfs_args /* {
519 		char *path;
520 		struct ostatfs *buf;
521 	} */ *uap;
522 {
523 	struct ostatfs osb;
524 	struct statfs sf;
525 	int error;
526 
527 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
528 	if (error)
529 		return (error);
530 	cvtstatfs(&sf, &osb);
531 	return (copyout(&osb, uap->buf, sizeof(osb)));
532 }
533 
534 /*
535  * Get filesystem statistics.
536  */
537 #ifndef _SYS_SYSPROTO_H_
538 struct freebsd4_fstatfs_args {
539 	int fd;
540 	struct ostatfs *buf;
541 };
542 #endif
543 int
544 freebsd4_fstatfs(td, uap)
545 	struct thread *td;
546 	struct freebsd4_fstatfs_args /* {
547 		int fd;
548 		struct ostatfs *buf;
549 	} */ *uap;
550 {
551 	struct ostatfs osb;
552 	struct statfs sf;
553 	int error;
554 
555 	error = kern_fstatfs(td, uap->fd, &sf);
556 	if (error)
557 		return (error);
558 	cvtstatfs(&sf, &osb);
559 	return (copyout(&osb, uap->buf, sizeof(osb)));
560 }
561 
562 /*
563  * Get statistics on all filesystems.
564  */
565 #ifndef _SYS_SYSPROTO_H_
566 struct freebsd4_getfsstat_args {
567 	struct ostatfs *buf;
568 	long bufsize;
569 	int flags;
570 };
571 #endif
572 int
573 freebsd4_getfsstat(td, uap)
574 	struct thread *td;
575 	register struct freebsd4_getfsstat_args /* {
576 		struct ostatfs *buf;
577 		long bufsize;
578 		int flags;
579 	} */ *uap;
580 {
581 	struct statfs *buf, *sp;
582 	struct ostatfs osb;
583 	size_t count, size;
584 	int error;
585 
586 	count = uap->bufsize / sizeof(struct ostatfs);
587 	size = count * sizeof(struct statfs);
588 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
589 	if (size > 0) {
590 		count = td->td_retval[0];
591 		sp = buf;
592 		while (count > 0 && error == 0) {
593 			cvtstatfs(sp, &osb);
594 			error = copyout(&osb, uap->buf, sizeof(osb));
595 			sp++;
596 			uap->buf++;
597 			count--;
598 		}
599 		free(buf, M_TEMP);
600 	}
601 	return (error);
602 }
603 
604 /*
605  * Implement fstatfs() for (NFS) file handles.
606  */
607 #ifndef _SYS_SYSPROTO_H_
608 struct freebsd4_fhstatfs_args {
609 	struct fhandle *u_fhp;
610 	struct ostatfs *buf;
611 };
612 #endif
613 int
614 freebsd4_fhstatfs(td, uap)
615 	struct thread *td;
616 	struct freebsd4_fhstatfs_args /* {
617 		struct fhandle *u_fhp;
618 		struct ostatfs *buf;
619 	} */ *uap;
620 {
621 	struct ostatfs osb;
622 	struct statfs sf;
623 	fhandle_t fh;
624 	int error;
625 
626 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
627 	if (error)
628 		return (error);
629 	error = kern_fhstatfs(td, fh, &sf);
630 	if (error)
631 		return (error);
632 	cvtstatfs(&sf, &osb);
633 	return (copyout(&osb, uap->buf, sizeof(osb)));
634 }
635 
636 /*
637  * Convert a new format statfs structure to an old format statfs structure.
638  */
639 static void
640 cvtstatfs(nsp, osp)
641 	struct statfs *nsp;
642 	struct ostatfs *osp;
643 {
644 
645 	bzero(osp, sizeof(*osp));
646 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
647 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
648 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
649 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
650 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
651 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
652 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
653 	osp->f_owner = nsp->f_owner;
654 	osp->f_type = nsp->f_type;
655 	osp->f_flags = nsp->f_flags;
656 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
657 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
658 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
659 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
660 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
661 	    MIN(MFSNAMELEN, OMFSNAMELEN));
662 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
663 	    MIN(MNAMELEN, OMNAMELEN));
664 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
665 	    MIN(MNAMELEN, OMNAMELEN));
666 	osp->f_fsid = nsp->f_fsid;
667 }
668 #endif /* COMPAT_FREEBSD4 */
669 
670 /*
671  * Change current working directory to a given file descriptor.
672  */
673 #ifndef _SYS_SYSPROTO_H_
674 struct fchdir_args {
675 	int	fd;
676 };
677 #endif
678 int
679 fchdir(td, uap)
680 	struct thread *td;
681 	struct fchdir_args /* {
682 		int fd;
683 	} */ *uap;
684 {
685 	register struct filedesc *fdp = td->td_proc->p_fd;
686 	struct vnode *vp, *tdp, *vpold;
687 	struct mount *mp;
688 	struct file *fp;
689 	int vfslocked;
690 	int error;
691 
692 	AUDIT_ARG(fd, uap->fd);
693 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
694 		return (error);
695 	vp = fp->f_vnode;
696 	VREF(vp);
697 	fdrop(fp, td);
698 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
699 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
700 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
701 	error = change_dir(vp, td);
702 	while (!error && (mp = vp->v_mountedhere) != NULL) {
703 		int tvfslocked;
704 		if (vfs_busy(mp, 0, 0, td))
705 			continue;
706 		tvfslocked = VFS_LOCK_GIANT(mp);
707 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
708 		vfs_unbusy(mp, td);
709 		if (error) {
710 			VFS_UNLOCK_GIANT(tvfslocked);
711 			break;
712 		}
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		vp = tdp;
716 		vfslocked = tvfslocked;
717 	}
718 	if (error) {
719 		vput(vp);
720 		VFS_UNLOCK_GIANT(vfslocked);
721 		return (error);
722 	}
723 	VOP_UNLOCK(vp, 0, td);
724 	VFS_UNLOCK_GIANT(vfslocked);
725 	FILEDESC_LOCK_FAST(fdp);
726 	vpold = fdp->fd_cdir;
727 	fdp->fd_cdir = vp;
728 	FILEDESC_UNLOCK_FAST(fdp);
729 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
730 	vrele(vpold);
731 	VFS_UNLOCK_GIANT(vfslocked);
732 	return (0);
733 }
734 
735 /*
736  * Change current working directory (``.'').
737  */
738 #ifndef _SYS_SYSPROTO_H_
739 struct chdir_args {
740 	char	*path;
741 };
742 #endif
743 int
744 chdir(td, uap)
745 	struct thread *td;
746 	struct chdir_args /* {
747 		char *path;
748 	} */ *uap;
749 {
750 
751 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
752 }
753 
754 int
755 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
756 {
757 	register struct filedesc *fdp = td->td_proc->p_fd;
758 	int error;
759 	struct nameidata nd;
760 	struct vnode *vp;
761 	int vfslocked;
762 
763 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
764 	    pathseg, path, td);
765 	if ((error = namei(&nd)) != 0)
766 		return (error);
767 	vfslocked = NDHASGIANT(&nd);
768 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
769 		vput(nd.ni_vp);
770 		VFS_UNLOCK_GIANT(vfslocked);
771 		NDFREE(&nd, NDF_ONLY_PNBUF);
772 		return (error);
773 	}
774 	VOP_UNLOCK(nd.ni_vp, 0, td);
775 	VFS_UNLOCK_GIANT(vfslocked);
776 	NDFREE(&nd, NDF_ONLY_PNBUF);
777 	FILEDESC_LOCK_FAST(fdp);
778 	vp = fdp->fd_cdir;
779 	fdp->fd_cdir = nd.ni_vp;
780 	FILEDESC_UNLOCK_FAST(fdp);
781 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
782 	vrele(vp);
783 	VFS_UNLOCK_GIANT(vfslocked);
784 	return (0);
785 }
786 
787 /*
788  * Helper function for raised chroot(2) security function:  Refuse if
789  * any filedescriptors are open directories.
790  */
791 static int
792 chroot_refuse_vdir_fds(fdp)
793 	struct filedesc *fdp;
794 {
795 	struct vnode *vp;
796 	struct file *fp;
797 	int fd;
798 
799 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
800 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
801 		fp = fget_locked(fdp, fd);
802 		if (fp == NULL)
803 			continue;
804 		if (fp->f_type == DTYPE_VNODE) {
805 			vp = fp->f_vnode;
806 			if (vp->v_type == VDIR)
807 				return (EPERM);
808 		}
809 	}
810 	return (0);
811 }
812 
813 /*
814  * This sysctl determines if we will allow a process to chroot(2) if it
815  * has a directory open:
816  *	0: disallowed for all processes.
817  *	1: allowed for processes that were not already chroot(2)'ed.
818  *	2: allowed for all processes.
819  */
820 
821 static int chroot_allow_open_directories = 1;
822 
823 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
824      &chroot_allow_open_directories, 0, "");
825 
826 /*
827  * Change notion of root (``/'') directory.
828  */
829 #ifndef _SYS_SYSPROTO_H_
830 struct chroot_args {
831 	char	*path;
832 };
833 #endif
834 int
835 chroot(td, uap)
836 	struct thread *td;
837 	struct chroot_args /* {
838 		char *path;
839 	} */ *uap;
840 {
841 	int error;
842 	struct nameidata nd;
843 	int vfslocked;
844 
845 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
846 	if (error)
847 		return (error);
848 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
849 	    UIO_USERSPACE, uap->path, td);
850 	error = namei(&nd);
851 	if (error)
852 		goto error;
853 	vfslocked = NDHASGIANT(&nd);
854 	if ((error = change_dir(nd.ni_vp, td)) != 0)
855 		goto e_vunlock;
856 #ifdef MAC
857 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
858 		goto e_vunlock;
859 #endif
860 	VOP_UNLOCK(nd.ni_vp, 0, td);
861 	error = change_root(nd.ni_vp, td);
862 	vrele(nd.ni_vp);
863 	VFS_UNLOCK_GIANT(vfslocked);
864 	NDFREE(&nd, NDF_ONLY_PNBUF);
865 	return (error);
866 e_vunlock:
867 	vput(nd.ni_vp);
868 	VFS_UNLOCK_GIANT(vfslocked);
869 error:
870 	NDFREE(&nd, NDF_ONLY_PNBUF);
871 	return (error);
872 }
873 
874 /*
875  * Common routine for chroot and chdir.  Callers must provide a locked vnode
876  * instance.
877  */
878 int
879 change_dir(vp, td)
880 	struct vnode *vp;
881 	struct thread *td;
882 {
883 	int error;
884 
885 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
886 	if (vp->v_type != VDIR)
887 		return (ENOTDIR);
888 #ifdef MAC
889 	error = mac_check_vnode_chdir(td->td_ucred, vp);
890 	if (error)
891 		return (error);
892 #endif
893 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
894 	return (error);
895 }
896 
897 /*
898  * Common routine for kern_chroot() and jail_attach().  The caller is
899  * responsible for invoking suser() and mac_check_chroot() to authorize this
900  * operation.
901  */
902 int
903 change_root(vp, td)
904 	struct vnode *vp;
905 	struct thread *td;
906 {
907 	struct filedesc *fdp;
908 	struct vnode *oldvp;
909 	int vfslocked;
910 	int error;
911 
912 	VFS_ASSERT_GIANT(vp->v_mount);
913 	fdp = td->td_proc->p_fd;
914 	FILEDESC_LOCK(fdp);
915 	if (chroot_allow_open_directories == 0 ||
916 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
917 		error = chroot_refuse_vdir_fds(fdp);
918 		if (error) {
919 			FILEDESC_UNLOCK(fdp);
920 			return (error);
921 		}
922 	}
923 	oldvp = fdp->fd_rdir;
924 	fdp->fd_rdir = vp;
925 	VREF(fdp->fd_rdir);
926 	if (!fdp->fd_jdir) {
927 		fdp->fd_jdir = vp;
928 		VREF(fdp->fd_jdir);
929 	}
930 	FILEDESC_UNLOCK(fdp);
931 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
932 	vrele(oldvp);
933 	VFS_UNLOCK_GIANT(vfslocked);
934 	return (0);
935 }
936 
937 /*
938  * Check permissions, allocate an open file structure,
939  * and call the device open routine if any.
940  *
941  * MP SAFE
942  */
943 #ifndef _SYS_SYSPROTO_H_
944 struct open_args {
945 	char	*path;
946 	int	flags;
947 	int	mode;
948 };
949 #endif
950 int
951 open(td, uap)
952 	struct thread *td;
953 	register struct open_args /* {
954 		char *path;
955 		int flags;
956 		int mode;
957 	} */ *uap;
958 {
959 
960 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
961 }
962 
963 int
964 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
965     int mode)
966 {
967 	struct proc *p = td->td_proc;
968 	struct filedesc *fdp = p->p_fd;
969 	struct file *fp;
970 	struct vnode *vp;
971 	struct vattr vat;
972 	struct mount *mp;
973 	int cmode;
974 	struct file *nfp;
975 	int type, indx, error;
976 	struct flock lf;
977 	struct nameidata nd;
978 	int vfslocked;
979 
980 	AUDIT_ARG(fflags, flags);
981 	AUDIT_ARG(mode, mode);
982 	if ((flags & O_ACCMODE) == O_ACCMODE)
983 		return (EINVAL);
984 	flags = FFLAGS(flags);
985 	error = falloc(td, &nfp, &indx);
986 	if (error)
987 		return (error);
988 	/* An extra reference on `nfp' has been held for us by falloc(). */
989 	fp = nfp;
990 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
991 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
992 	td->td_dupfd = -1;		/* XXX check for fdopen */
993 	error = vn_open(&nd, &flags, cmode, indx);
994 	if (error) {
995 		/*
996 		 * If the vn_open replaced the method vector, something
997 		 * wonderous happened deep below and we just pass it up
998 		 * pretending we know what we do.
999 		 */
1000 		if (error == ENXIO && fp->f_ops != &badfileops) {
1001 			fdrop(fp, td);
1002 			td->td_retval[0] = indx;
1003 			return (0);
1004 		}
1005 
1006 		/*
1007 		 * release our own reference
1008 		 */
1009 		fdrop(fp, td);
1010 
1011 		/*
1012 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1013 		 * responsible for dropping the old contents of ofiles[indx]
1014 		 * if it succeeds.
1015 		 */
1016 		if ((error == ENODEV || error == ENXIO) &&
1017 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1018 		    (error =
1019 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1020 			td->td_retval[0] = indx;
1021 			return (0);
1022 		}
1023 		/*
1024 		 * Clean up the descriptor, but only if another thread hadn't
1025 		 * replaced or closed it.
1026 		 */
1027 		fdclose(fdp, fp, indx, td);
1028 
1029 		if (error == ERESTART)
1030 			error = EINTR;
1031 		return (error);
1032 	}
1033 	td->td_dupfd = 0;
1034 	vfslocked = NDHASGIANT(&nd);
1035 	NDFREE(&nd, NDF_ONLY_PNBUF);
1036 	vp = nd.ni_vp;
1037 
1038 	/*
1039 	 * There should be 2 references on the file, one from the descriptor
1040 	 * table, and one for us.
1041 	 *
1042 	 * Handle the case where someone closed the file (via its file
1043 	 * descriptor) while we were blocked.  The end result should look
1044 	 * like opening the file succeeded but it was immediately closed.
1045 	 * We call vn_close() manually because we haven't yet hooked up
1046 	 * the various 'struct file' fields.
1047 	 */
1048 	FILEDESC_LOCK(fdp);
1049 	FILE_LOCK(fp);
1050 	if (fp->f_count == 1) {
1051 		mp = vp->v_mount;
1052 		KASSERT(fdp->fd_ofiles[indx] != fp,
1053 		    ("Open file descriptor lost all refs"));
1054 		FILE_UNLOCK(fp);
1055 		FILEDESC_UNLOCK(fdp);
1056 		VOP_UNLOCK(vp, 0, td);
1057 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1058 		VFS_UNLOCK_GIANT(vfslocked);
1059 		fdrop(fp, td);
1060 		td->td_retval[0] = indx;
1061 		return (0);
1062 	}
1063 	fp->f_vnode = vp;
1064 	if (fp->f_data == NULL)
1065 		fp->f_data = vp;
1066 	fp->f_flag = flags & FMASK;
1067 	if (fp->f_ops == &badfileops)
1068 		fp->f_ops = &vnops;
1069 	fp->f_seqcount = 1;
1070 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1071 	FILE_UNLOCK(fp);
1072 	FILEDESC_UNLOCK(fdp);
1073 
1074 	VOP_UNLOCK(vp, 0, td);
1075 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1076 		lf.l_whence = SEEK_SET;
1077 		lf.l_start = 0;
1078 		lf.l_len = 0;
1079 		if (flags & O_EXLOCK)
1080 			lf.l_type = F_WRLCK;
1081 		else
1082 			lf.l_type = F_RDLCK;
1083 		type = F_FLOCK;
1084 		if ((flags & FNONBLOCK) == 0)
1085 			type |= F_WAIT;
1086 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1087 			    type)) != 0)
1088 			goto bad;
1089 		fp->f_flag |= FHASLOCK;
1090 	}
1091 	if (flags & O_TRUNC) {
1092 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1093 			goto bad;
1094 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1095 		VATTR_NULL(&vat);
1096 		vat.va_size = 0;
1097 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1098 #ifdef MAC
1099 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1100 		if (error == 0)
1101 #endif
1102 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1103 		VOP_UNLOCK(vp, 0, td);
1104 		vn_finished_write(mp);
1105 		if (error)
1106 			goto bad;
1107 	}
1108 	VFS_UNLOCK_GIANT(vfslocked);
1109 	/*
1110 	 * Release our private reference, leaving the one associated with
1111 	 * the descriptor table intact.
1112 	 */
1113 	fdrop(fp, td);
1114 	td->td_retval[0] = indx;
1115 	return (0);
1116 bad:
1117 	VFS_UNLOCK_GIANT(vfslocked);
1118 	fdclose(fdp, fp, indx, td);
1119 	fdrop(fp, td);
1120 	return (error);
1121 }
1122 
1123 #ifdef COMPAT_43
1124 /*
1125  * Create a file.
1126  *
1127  * MP SAFE
1128  */
1129 #ifndef _SYS_SYSPROTO_H_
1130 struct ocreat_args {
1131 	char	*path;
1132 	int	mode;
1133 };
1134 #endif
1135 int
1136 ocreat(td, uap)
1137 	struct thread *td;
1138 	register struct ocreat_args /* {
1139 		char *path;
1140 		int mode;
1141 	} */ *uap;
1142 {
1143 
1144 	return (kern_open(td, uap->path, UIO_USERSPACE,
1145 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1146 }
1147 #endif /* COMPAT_43 */
1148 
1149 /*
1150  * Create a special file.
1151  */
1152 #ifndef _SYS_SYSPROTO_H_
1153 struct mknod_args {
1154 	char	*path;
1155 	int	mode;
1156 	int	dev;
1157 };
1158 #endif
1159 int
1160 mknod(td, uap)
1161 	struct thread *td;
1162 	register struct mknod_args /* {
1163 		char *path;
1164 		int mode;
1165 		int dev;
1166 	} */ *uap;
1167 {
1168 
1169 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1170 }
1171 
1172 int
1173 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1174     int dev)
1175 {
1176 	struct vnode *vp;
1177 	struct mount *mp;
1178 	struct vattr vattr;
1179 	int error;
1180 	int whiteout = 0;
1181 	struct nameidata nd;
1182 	int vfslocked;
1183 
1184 	AUDIT_ARG(mode, mode);
1185 	AUDIT_ARG(dev, dev);
1186 	switch (mode & S_IFMT) {
1187 	case S_IFCHR:
1188 	case S_IFBLK:
1189 		error = suser(td);
1190 		break;
1191 	default:
1192 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1193 		break;
1194 	}
1195 	if (error)
1196 		return (error);
1197 restart:
1198 	bwillwrite();
1199 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1200 	    pathseg, path, td);
1201 	if ((error = namei(&nd)) != 0)
1202 		return (error);
1203 	vfslocked = NDHASGIANT(&nd);
1204 	vp = nd.ni_vp;
1205 	if (vp != NULL) {
1206 		NDFREE(&nd, NDF_ONLY_PNBUF);
1207 		if (vp == nd.ni_dvp)
1208 			vrele(nd.ni_dvp);
1209 		else
1210 			vput(nd.ni_dvp);
1211 		vrele(vp);
1212 		VFS_UNLOCK_GIANT(vfslocked);
1213 		return (EEXIST);
1214 	} else {
1215 		VATTR_NULL(&vattr);
1216 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1217 		vattr.va_mode = (mode & ALLPERMS) &
1218 		    ~td->td_proc->p_fd->fd_cmask;
1219 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1220 		vattr.va_rdev = dev;
1221 		whiteout = 0;
1222 
1223 		switch (mode & S_IFMT) {
1224 		case S_IFMT:	/* used by badsect to flag bad sectors */
1225 			vattr.va_type = VBAD;
1226 			break;
1227 		case S_IFCHR:
1228 			vattr.va_type = VCHR;
1229 			break;
1230 		case S_IFBLK:
1231 			vattr.va_type = VBLK;
1232 			break;
1233 		case S_IFWHT:
1234 			whiteout = 1;
1235 			break;
1236 		default:
1237 			error = EINVAL;
1238 			break;
1239 		}
1240 	}
1241 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1242 		NDFREE(&nd, NDF_ONLY_PNBUF);
1243 		vput(nd.ni_dvp);
1244 		VFS_UNLOCK_GIANT(vfslocked);
1245 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1246 			return (error);
1247 		goto restart;
1248 	}
1249 #ifdef MAC
1250 	if (error == 0 && !whiteout)
1251 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1252 		    &nd.ni_cnd, &vattr);
1253 #endif
1254 	if (!error) {
1255 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1256 		if (whiteout)
1257 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1258 		else {
1259 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1260 						&nd.ni_cnd, &vattr);
1261 			if (error == 0)
1262 				vput(nd.ni_vp);
1263 		}
1264 	}
1265 	NDFREE(&nd, NDF_ONLY_PNBUF);
1266 	vput(nd.ni_dvp);
1267 	vn_finished_write(mp);
1268 	VFS_UNLOCK_GIANT(vfslocked);
1269 	return (error);
1270 }
1271 
1272 /*
1273  * Create a named pipe.
1274  */
1275 #ifndef _SYS_SYSPROTO_H_
1276 struct mkfifo_args {
1277 	char	*path;
1278 	int	mode;
1279 };
1280 #endif
1281 int
1282 mkfifo(td, uap)
1283 	struct thread *td;
1284 	register struct mkfifo_args /* {
1285 		char *path;
1286 		int mode;
1287 	} */ *uap;
1288 {
1289 
1290 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1291 }
1292 
1293 int
1294 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1295 {
1296 	struct mount *mp;
1297 	struct vattr vattr;
1298 	int error;
1299 	struct nameidata nd;
1300 	int vfslocked;
1301 
1302 	AUDIT_ARG(mode, mode);
1303 restart:
1304 	bwillwrite();
1305 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1306 	    pathseg, path, td);
1307 	if ((error = namei(&nd)) != 0)
1308 		return (error);
1309 	vfslocked = NDHASGIANT(&nd);
1310 	if (nd.ni_vp != NULL) {
1311 		NDFREE(&nd, NDF_ONLY_PNBUF);
1312 		if (nd.ni_vp == nd.ni_dvp)
1313 			vrele(nd.ni_dvp);
1314 		else
1315 			vput(nd.ni_dvp);
1316 		vrele(nd.ni_vp);
1317 		VFS_UNLOCK_GIANT(vfslocked);
1318 		return (EEXIST);
1319 	}
1320 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1321 		NDFREE(&nd, NDF_ONLY_PNBUF);
1322 		vput(nd.ni_dvp);
1323 		VFS_UNLOCK_GIANT(vfslocked);
1324 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1325 			return (error);
1326 		goto restart;
1327 	}
1328 	VATTR_NULL(&vattr);
1329 	vattr.va_type = VFIFO;
1330 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1331 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1332 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1333 #ifdef MAC
1334 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1335 	    &vattr);
1336 	if (error)
1337 		goto out;
1338 #endif
1339 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1340 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1341 	if (error == 0)
1342 		vput(nd.ni_vp);
1343 #ifdef MAC
1344 out:
1345 #endif
1346 	vput(nd.ni_dvp);
1347 	vn_finished_write(mp);
1348 	VFS_UNLOCK_GIANT(vfslocked);
1349 	NDFREE(&nd, NDF_ONLY_PNBUF);
1350 	return (error);
1351 }
1352 
1353 /*
1354  * Make a hard file link.
1355  */
1356 #ifndef _SYS_SYSPROTO_H_
1357 struct link_args {
1358 	char	*path;
1359 	char	*link;
1360 };
1361 #endif
1362 int
1363 link(td, uap)
1364 	struct thread *td;
1365 	register struct link_args /* {
1366 		char *path;
1367 		char *link;
1368 	} */ *uap;
1369 {
1370 	int error;
1371 
1372 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1373 	return (error);
1374 }
1375 
1376 static int hardlink_check_uid = 0;
1377 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1378     &hardlink_check_uid, 0,
1379     "Unprivileged processes cannot create hard links to files owned by other "
1380     "users");
1381 static int hardlink_check_gid = 0;
1382 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1383     &hardlink_check_gid, 0,
1384     "Unprivileged processes cannot create hard links to files owned by other "
1385     "groups");
1386 
1387 static int
1388 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1389 {
1390 	struct vattr va;
1391 	int error;
1392 
1393 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1394 		return (0);
1395 
1396 	if (!hardlink_check_uid && !hardlink_check_gid)
1397 		return (0);
1398 
1399 	error = VOP_GETATTR(vp, &va, cred, td);
1400 	if (error != 0)
1401 		return (error);
1402 
1403 	if (hardlink_check_uid) {
1404 		if (cred->cr_uid != va.va_uid)
1405 			return (EPERM);
1406 	}
1407 
1408 	if (hardlink_check_gid) {
1409 		if (!groupmember(va.va_gid, cred))
1410 			return (EPERM);
1411 	}
1412 
1413 	return (0);
1414 }
1415 
1416 int
1417 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1418 {
1419 	struct vnode *vp;
1420 	struct mount *mp;
1421 	struct nameidata nd;
1422 	int vfslocked;
1423 	int lvfslocked;
1424 	int error;
1425 
1426 	bwillwrite();
1427 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1428 	if ((error = namei(&nd)) != 0)
1429 		return (error);
1430 	vfslocked = NDHASGIANT(&nd);
1431 	NDFREE(&nd, NDF_ONLY_PNBUF);
1432 	vp = nd.ni_vp;
1433 	if (vp->v_type == VDIR) {
1434 		vrele(vp);
1435 		VFS_UNLOCK_GIANT(vfslocked);
1436 		return (EPERM);		/* POSIX */
1437 	}
1438 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1439 		vrele(vp);
1440 		VFS_UNLOCK_GIANT(vfslocked);
1441 		return (error);
1442 	}
1443 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1444 	    segflg, link, td);
1445 	if ((error = namei(&nd)) == 0) {
1446 		lvfslocked = NDHASGIANT(&nd);
1447 		if (nd.ni_vp != NULL) {
1448 			if (nd.ni_dvp == nd.ni_vp)
1449 				vrele(nd.ni_dvp);
1450 			else
1451 				vput(nd.ni_dvp);
1452 			vrele(nd.ni_vp);
1453 			error = EEXIST;
1454 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1455 		    == 0) {
1456 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1457 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1458 			error = can_hardlink(vp, td, td->td_ucred);
1459 			if (error == 0)
1460 #ifdef MAC
1461 				error = mac_check_vnode_link(td->td_ucred,
1462 				    nd.ni_dvp, vp, &nd.ni_cnd);
1463 			if (error == 0)
1464 #endif
1465 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1466 			VOP_UNLOCK(vp, 0, td);
1467 			vput(nd.ni_dvp);
1468 		}
1469 		NDFREE(&nd, NDF_ONLY_PNBUF);
1470 		VFS_UNLOCK_GIANT(lvfslocked);
1471 	}
1472 	vrele(vp);
1473 	vn_finished_write(mp);
1474 	VFS_UNLOCK_GIANT(vfslocked);
1475 	return (error);
1476 }
1477 
1478 /*
1479  * Make a symbolic link.
1480  */
1481 #ifndef _SYS_SYSPROTO_H_
1482 struct symlink_args {
1483 	char	*path;
1484 	char	*link;
1485 };
1486 #endif
1487 int
1488 symlink(td, uap)
1489 	struct thread *td;
1490 	register struct symlink_args /* {
1491 		char *path;
1492 		char *link;
1493 	} */ *uap;
1494 {
1495 
1496 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1497 }
1498 
1499 int
1500 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1501 {
1502 	struct mount *mp;
1503 	struct vattr vattr;
1504 	char *syspath;
1505 	int error;
1506 	struct nameidata nd;
1507 	int vfslocked;
1508 
1509 	if (segflg == UIO_SYSSPACE) {
1510 		syspath = path;
1511 	} else {
1512 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1513 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1514 			goto out;
1515 	}
1516 	AUDIT_ARG(text, syspath);
1517 restart:
1518 	bwillwrite();
1519 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1520 	    segflg, link, td);
1521 	if ((error = namei(&nd)) != 0)
1522 		goto out;
1523 	vfslocked = NDHASGIANT(&nd);
1524 	if (nd.ni_vp) {
1525 		NDFREE(&nd, NDF_ONLY_PNBUF);
1526 		if (nd.ni_vp == nd.ni_dvp)
1527 			vrele(nd.ni_dvp);
1528 		else
1529 			vput(nd.ni_dvp);
1530 		vrele(nd.ni_vp);
1531 		VFS_UNLOCK_GIANT(vfslocked);
1532 		error = EEXIST;
1533 		goto out;
1534 	}
1535 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1536 		NDFREE(&nd, NDF_ONLY_PNBUF);
1537 		vput(nd.ni_dvp);
1538 		VFS_UNLOCK_GIANT(vfslocked);
1539 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1540 			goto out;
1541 		goto restart;
1542 	}
1543 	VATTR_NULL(&vattr);
1544 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1545 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1546 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1547 #ifdef MAC
1548 	vattr.va_type = VLNK;
1549 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1550 	    &vattr);
1551 	if (error)
1552 		goto out2;
1553 #endif
1554 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1555 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1556 	if (error == 0)
1557 		vput(nd.ni_vp);
1558 #ifdef MAC
1559 out2:
1560 #endif
1561 	NDFREE(&nd, NDF_ONLY_PNBUF);
1562 	vput(nd.ni_dvp);
1563 	vn_finished_write(mp);
1564 	VFS_UNLOCK_GIANT(vfslocked);
1565 out:
1566 	if (segflg != UIO_SYSSPACE)
1567 		uma_zfree(namei_zone, syspath);
1568 	return (error);
1569 }
1570 
1571 /*
1572  * Delete a whiteout from the filesystem.
1573  */
1574 int
1575 undelete(td, uap)
1576 	struct thread *td;
1577 	register struct undelete_args /* {
1578 		char *path;
1579 	} */ *uap;
1580 {
1581 	int error;
1582 	struct mount *mp;
1583 	struct nameidata nd;
1584 	int vfslocked;
1585 
1586 restart:
1587 	bwillwrite();
1588 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1589 	    UIO_USERSPACE, uap->path, td);
1590 	error = namei(&nd);
1591 	if (error)
1592 		return (error);
1593 	vfslocked = NDHASGIANT(&nd);
1594 
1595 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1596 		NDFREE(&nd, NDF_ONLY_PNBUF);
1597 		if (nd.ni_vp == nd.ni_dvp)
1598 			vrele(nd.ni_dvp);
1599 		else
1600 			vput(nd.ni_dvp);
1601 		if (nd.ni_vp)
1602 			vrele(nd.ni_vp);
1603 		VFS_UNLOCK_GIANT(vfslocked);
1604 		return (EEXIST);
1605 	}
1606 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1607 		NDFREE(&nd, NDF_ONLY_PNBUF);
1608 		vput(nd.ni_dvp);
1609 		VFS_UNLOCK_GIANT(vfslocked);
1610 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1611 			return (error);
1612 		goto restart;
1613 	}
1614 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1615 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1616 	NDFREE(&nd, NDF_ONLY_PNBUF);
1617 	vput(nd.ni_dvp);
1618 	vn_finished_write(mp);
1619 	VFS_UNLOCK_GIANT(vfslocked);
1620 	return (error);
1621 }
1622 
1623 /*
1624  * Delete a name from the filesystem.
1625  */
1626 #ifndef _SYS_SYSPROTO_H_
1627 struct unlink_args {
1628 	char	*path;
1629 };
1630 #endif
1631 int
1632 unlink(td, uap)
1633 	struct thread *td;
1634 	struct unlink_args /* {
1635 		char *path;
1636 	} */ *uap;
1637 {
1638 	int error;
1639 
1640 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1641 	return (error);
1642 }
1643 
1644 int
1645 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1646 {
1647 	struct mount *mp;
1648 	struct vnode *vp;
1649 	int error;
1650 	struct nameidata nd;
1651 	int vfslocked;
1652 
1653 restart:
1654 	bwillwrite();
1655 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1656 	    pathseg, path, td);
1657 	if ((error = namei(&nd)) != 0)
1658 		return (error == EINVAL ? EPERM : error);
1659 	vfslocked = NDHASGIANT(&nd);
1660 	vp = nd.ni_vp;
1661 	if (vp->v_type == VDIR)
1662 		error = EPERM;		/* POSIX */
1663 	else {
1664 		/*
1665 		 * The root of a mounted filesystem cannot be deleted.
1666 		 *
1667 		 * XXX: can this only be a VDIR case?
1668 		 */
1669 		if (vp->v_vflag & VV_ROOT)
1670 			error = EBUSY;
1671 	}
1672 	if (error == 0) {
1673 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1674 			NDFREE(&nd, NDF_ONLY_PNBUF);
1675 			vput(nd.ni_dvp);
1676 			if (vp == nd.ni_dvp)
1677 				vrele(vp);
1678 			else
1679 				vput(vp);
1680 			VFS_UNLOCK_GIANT(vfslocked);
1681 			if ((error = vn_start_write(NULL, &mp,
1682 			    V_XSLEEP | PCATCH)) != 0)
1683 				return (error);
1684 			goto restart;
1685 		}
1686 #ifdef MAC
1687 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1688 		    &nd.ni_cnd);
1689 		if (error)
1690 			goto out;
1691 #endif
1692 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1693 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1694 #ifdef MAC
1695 out:
1696 #endif
1697 		vn_finished_write(mp);
1698 	}
1699 	NDFREE(&nd, NDF_ONLY_PNBUF);
1700 	vput(nd.ni_dvp);
1701 	if (vp == nd.ni_dvp)
1702 		vrele(vp);
1703 	else
1704 		vput(vp);
1705 	VFS_UNLOCK_GIANT(vfslocked);
1706 	return (error);
1707 }
1708 
1709 /*
1710  * Reposition read/write file offset.
1711  */
1712 #ifndef _SYS_SYSPROTO_H_
1713 struct lseek_args {
1714 	int	fd;
1715 	int	pad;
1716 	off_t	offset;
1717 	int	whence;
1718 };
1719 #endif
1720 int
1721 lseek(td, uap)
1722 	struct thread *td;
1723 	register struct lseek_args /* {
1724 		int fd;
1725 		int pad;
1726 		off_t offset;
1727 		int whence;
1728 	} */ *uap;
1729 {
1730 	struct ucred *cred = td->td_ucred;
1731 	struct file *fp;
1732 	struct vnode *vp;
1733 	struct vattr vattr;
1734 	off_t offset;
1735 	int error, noneg;
1736 	int vfslocked;
1737 
1738 	if ((error = fget(td, uap->fd, &fp)) != 0)
1739 		return (error);
1740 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1741 		fdrop(fp, td);
1742 		return (ESPIPE);
1743 	}
1744 	vp = fp->f_vnode;
1745 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1746 	noneg = (vp->v_type != VCHR);
1747 	offset = uap->offset;
1748 	switch (uap->whence) {
1749 	case L_INCR:
1750 		if (noneg &&
1751 		    (fp->f_offset < 0 ||
1752 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1753 			error = EOVERFLOW;
1754 			break;
1755 		}
1756 		offset += fp->f_offset;
1757 		break;
1758 	case L_XTND:
1759 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1760 		error = VOP_GETATTR(vp, &vattr, cred, td);
1761 		VOP_UNLOCK(vp, 0, td);
1762 		if (error)
1763 			break;
1764 		if (noneg &&
1765 		    (vattr.va_size > OFF_MAX ||
1766 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1767 			error = EOVERFLOW;
1768 			break;
1769 		}
1770 		offset += vattr.va_size;
1771 		break;
1772 	case L_SET:
1773 		break;
1774 	default:
1775 		error = EINVAL;
1776 	}
1777 	if (error == 0 && noneg && offset < 0)
1778 		error = EINVAL;
1779 	if (error != 0)
1780 		goto drop;
1781 	fp->f_offset = offset;
1782 	*(off_t *)(td->td_retval) = fp->f_offset;
1783 drop:
1784 	fdrop(fp, td);
1785 	VFS_UNLOCK_GIANT(vfslocked);
1786 	return (error);
1787 }
1788 
1789 #if defined(COMPAT_43)
1790 /*
1791  * Reposition read/write file offset.
1792  */
1793 #ifndef _SYS_SYSPROTO_H_
1794 struct olseek_args {
1795 	int	fd;
1796 	long	offset;
1797 	int	whence;
1798 };
1799 #endif
1800 int
1801 olseek(td, uap)
1802 	struct thread *td;
1803 	register struct olseek_args /* {
1804 		int fd;
1805 		long offset;
1806 		int whence;
1807 	} */ *uap;
1808 {
1809 	struct lseek_args /* {
1810 		int fd;
1811 		int pad;
1812 		off_t offset;
1813 		int whence;
1814 	} */ nuap;
1815 	int error;
1816 
1817 	nuap.fd = uap->fd;
1818 	nuap.offset = uap->offset;
1819 	nuap.whence = uap->whence;
1820 	error = lseek(td, &nuap);
1821 	return (error);
1822 }
1823 #endif /* COMPAT_43 */
1824 
1825 /*
1826  * Check access permissions using passed credentials.
1827  */
1828 static int
1829 vn_access(vp, user_flags, cred, td)
1830 	struct vnode	*vp;
1831 	int		user_flags;
1832 	struct ucred	*cred;
1833 	struct thread	*td;
1834 {
1835 	int error, flags;
1836 
1837 	/* Flags == 0 means only check for existence. */
1838 	error = 0;
1839 	if (user_flags) {
1840 		flags = 0;
1841 		if (user_flags & R_OK)
1842 			flags |= VREAD;
1843 		if (user_flags & W_OK)
1844 			flags |= VWRITE;
1845 		if (user_flags & X_OK)
1846 			flags |= VEXEC;
1847 #ifdef MAC
1848 		error = mac_check_vnode_access(cred, vp, flags);
1849 		if (error)
1850 			return (error);
1851 #endif
1852 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1853 			error = VOP_ACCESS(vp, flags, cred, td);
1854 	}
1855 	return (error);
1856 }
1857 
1858 /*
1859  * Check access permissions using "real" credentials.
1860  */
1861 #ifndef _SYS_SYSPROTO_H_
1862 struct access_args {
1863 	char	*path;
1864 	int	flags;
1865 };
1866 #endif
1867 int
1868 access(td, uap)
1869 	struct thread *td;
1870 	register struct access_args /* {
1871 		char *path;
1872 		int flags;
1873 	} */ *uap;
1874 {
1875 
1876 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1877 }
1878 
1879 int
1880 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1881 {
1882 	struct ucred *cred, *tmpcred;
1883 	register struct vnode *vp;
1884 	struct nameidata nd;
1885 	int vfslocked;
1886 	int error;
1887 
1888 	/*
1889 	 * Create and modify a temporary credential instead of one that
1890 	 * is potentially shared.  This could also mess up socket
1891 	 * buffer accounting which can run in an interrupt context.
1892 	 */
1893 	cred = td->td_ucred;
1894 	tmpcred = crdup(cred);
1895 	tmpcred->cr_uid = cred->cr_ruid;
1896 	tmpcred->cr_groups[0] = cred->cr_rgid;
1897 	td->td_ucred = tmpcred;
1898 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1899 	    pathseg, path, td);
1900 	if ((error = namei(&nd)) != 0)
1901 		goto out1;
1902 	vfslocked = NDHASGIANT(&nd);
1903 	vp = nd.ni_vp;
1904 
1905 	error = vn_access(vp, flags, tmpcred, td);
1906 	NDFREE(&nd, NDF_ONLY_PNBUF);
1907 	vput(vp);
1908 	VFS_UNLOCK_GIANT(vfslocked);
1909 out1:
1910 	td->td_ucred = cred;
1911 	crfree(tmpcred);
1912 	return (error);
1913 }
1914 
1915 /*
1916  * Check access permissions using "effective" credentials.
1917  */
1918 #ifndef _SYS_SYSPROTO_H_
1919 struct eaccess_args {
1920 	char	*path;
1921 	int	flags;
1922 };
1923 #endif
1924 int
1925 eaccess(td, uap)
1926 	struct thread *td;
1927 	register struct eaccess_args /* {
1928 		char *path;
1929 		int flags;
1930 	} */ *uap;
1931 {
1932 
1933 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1934 }
1935 
1936 int
1937 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1938 {
1939 	struct nameidata nd;
1940 	struct vnode *vp;
1941 	int vfslocked;
1942 	int error;
1943 
1944 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1945 	    pathseg, path, td);
1946 	if ((error = namei(&nd)) != 0)
1947 		return (error);
1948 	vp = nd.ni_vp;
1949 	vfslocked = NDHASGIANT(&nd);
1950 	error = vn_access(vp, flags, td->td_ucred, td);
1951 	NDFREE(&nd, NDF_ONLY_PNBUF);
1952 	vput(vp);
1953 	VFS_UNLOCK_GIANT(vfslocked);
1954 	return (error);
1955 }
1956 
1957 #if defined(COMPAT_43)
1958 /*
1959  * Get file status; this version follows links.
1960  */
1961 #ifndef _SYS_SYSPROTO_H_
1962 struct ostat_args {
1963 	char	*path;
1964 	struct ostat *ub;
1965 };
1966 #endif
1967 int
1968 ostat(td, uap)
1969 	struct thread *td;
1970 	register struct ostat_args /* {
1971 		char *path;
1972 		struct ostat *ub;
1973 	} */ *uap;
1974 {
1975 	struct stat sb;
1976 	struct ostat osb;
1977 	int error;
1978 
1979 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1980 	if (error)
1981 		return (error);
1982 	cvtstat(&sb, &osb);
1983 	error = copyout(&osb, uap->ub, sizeof (osb));
1984 	return (error);
1985 }
1986 
1987 /*
1988  * Get file status; this version does not follow links.
1989  */
1990 #ifndef _SYS_SYSPROTO_H_
1991 struct olstat_args {
1992 	char	*path;
1993 	struct ostat *ub;
1994 };
1995 #endif
1996 int
1997 olstat(td, uap)
1998 	struct thread *td;
1999 	register struct olstat_args /* {
2000 		char *path;
2001 		struct ostat *ub;
2002 	} */ *uap;
2003 {
2004 	struct stat sb;
2005 	struct ostat osb;
2006 	int error;
2007 
2008 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2009 	if (error)
2010 		return (error);
2011 	cvtstat(&sb, &osb);
2012 	error = copyout(&osb, uap->ub, sizeof (osb));
2013 	return (error);
2014 }
2015 
2016 /*
2017  * Convert from an old to a new stat structure.
2018  */
2019 void
2020 cvtstat(st, ost)
2021 	struct stat *st;
2022 	struct ostat *ost;
2023 {
2024 
2025 	ost->st_dev = st->st_dev;
2026 	ost->st_ino = st->st_ino;
2027 	ost->st_mode = st->st_mode;
2028 	ost->st_nlink = st->st_nlink;
2029 	ost->st_uid = st->st_uid;
2030 	ost->st_gid = st->st_gid;
2031 	ost->st_rdev = st->st_rdev;
2032 	if (st->st_size < (quad_t)1 << 32)
2033 		ost->st_size = st->st_size;
2034 	else
2035 		ost->st_size = -2;
2036 	ost->st_atime = st->st_atime;
2037 	ost->st_mtime = st->st_mtime;
2038 	ost->st_ctime = st->st_ctime;
2039 	ost->st_blksize = st->st_blksize;
2040 	ost->st_blocks = st->st_blocks;
2041 	ost->st_flags = st->st_flags;
2042 	ost->st_gen = st->st_gen;
2043 }
2044 #endif /* COMPAT_43 */
2045 
2046 /*
2047  * Get file status; this version follows links.
2048  */
2049 #ifndef _SYS_SYSPROTO_H_
2050 struct stat_args {
2051 	char	*path;
2052 	struct stat *ub;
2053 };
2054 #endif
2055 int
2056 stat(td, uap)
2057 	struct thread *td;
2058 	register struct stat_args /* {
2059 		char *path;
2060 		struct stat *ub;
2061 	} */ *uap;
2062 {
2063 	struct stat sb;
2064 	int error;
2065 
2066 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2067 	if (error == 0)
2068 		error = copyout(&sb, uap->ub, sizeof (sb));
2069 	return (error);
2070 }
2071 
2072 int
2073 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2074 {
2075 	struct nameidata nd;
2076 	struct stat sb;
2077 	int error, vfslocked;
2078 
2079 	NDINIT(&nd, LOOKUP,
2080 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2081 	    pathseg, path, td);
2082 	if ((error = namei(&nd)) != 0)
2083 		return (error);
2084 	vfslocked = NDHASGIANT(&nd);
2085 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2086 	NDFREE(&nd, NDF_ONLY_PNBUF);
2087 	vput(nd.ni_vp);
2088 	VFS_UNLOCK_GIANT(vfslocked);
2089 	if (mtx_owned(&Giant))
2090 		printf("stat(%d): %s\n", vfslocked, path);
2091 	if (error)
2092 		return (error);
2093 	*sbp = sb;
2094 	return (0);
2095 }
2096 
2097 /*
2098  * Get file status; this version does not follow links.
2099  */
2100 #ifndef _SYS_SYSPROTO_H_
2101 struct lstat_args {
2102 	char	*path;
2103 	struct stat *ub;
2104 };
2105 #endif
2106 int
2107 lstat(td, uap)
2108 	struct thread *td;
2109 	register struct lstat_args /* {
2110 		char *path;
2111 		struct stat *ub;
2112 	} */ *uap;
2113 {
2114 	struct stat sb;
2115 	int error;
2116 
2117 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2118 	if (error == 0)
2119 		error = copyout(&sb, uap->ub, sizeof (sb));
2120 	return (error);
2121 }
2122 
2123 int
2124 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2125 {
2126 	struct vnode *vp;
2127 	struct stat sb;
2128 	struct nameidata nd;
2129 	int error, vfslocked;
2130 
2131 	NDINIT(&nd, LOOKUP,
2132 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2133 	    pathseg, path, td);
2134 	if ((error = namei(&nd)) != 0)
2135 		return (error);
2136 	vfslocked = NDHASGIANT(&nd);
2137 	vp = nd.ni_vp;
2138 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2139 	NDFREE(&nd, NDF_ONLY_PNBUF);
2140 	vput(vp);
2141 	VFS_UNLOCK_GIANT(vfslocked);
2142 	if (error)
2143 		return (error);
2144 	*sbp = sb;
2145 	return (0);
2146 }
2147 
2148 /*
2149  * Implementation of the NetBSD [l]stat() functions.
2150  */
2151 void
2152 cvtnstat(sb, nsb)
2153 	struct stat *sb;
2154 	struct nstat *nsb;
2155 {
2156 	bzero(nsb, sizeof *nsb);
2157 	nsb->st_dev = sb->st_dev;
2158 	nsb->st_ino = sb->st_ino;
2159 	nsb->st_mode = sb->st_mode;
2160 	nsb->st_nlink = sb->st_nlink;
2161 	nsb->st_uid = sb->st_uid;
2162 	nsb->st_gid = sb->st_gid;
2163 	nsb->st_rdev = sb->st_rdev;
2164 	nsb->st_atimespec = sb->st_atimespec;
2165 	nsb->st_mtimespec = sb->st_mtimespec;
2166 	nsb->st_ctimespec = sb->st_ctimespec;
2167 	nsb->st_size = sb->st_size;
2168 	nsb->st_blocks = sb->st_blocks;
2169 	nsb->st_blksize = sb->st_blksize;
2170 	nsb->st_flags = sb->st_flags;
2171 	nsb->st_gen = sb->st_gen;
2172 	nsb->st_birthtimespec = sb->st_birthtimespec;
2173 }
2174 
2175 #ifndef _SYS_SYSPROTO_H_
2176 struct nstat_args {
2177 	char	*path;
2178 	struct nstat *ub;
2179 };
2180 #endif
2181 int
2182 nstat(td, uap)
2183 	struct thread *td;
2184 	register struct nstat_args /* {
2185 		char *path;
2186 		struct nstat *ub;
2187 	} */ *uap;
2188 {
2189 	struct stat sb;
2190 	struct nstat nsb;
2191 	int error;
2192 
2193 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2194 	if (error)
2195 		return (error);
2196 	cvtnstat(&sb, &nsb);
2197 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2198 	return (error);
2199 }
2200 
2201 /*
2202  * NetBSD lstat.  Get file status; this version does not follow links.
2203  */
2204 #ifndef _SYS_SYSPROTO_H_
2205 struct lstat_args {
2206 	char	*path;
2207 	struct stat *ub;
2208 };
2209 #endif
2210 int
2211 nlstat(td, uap)
2212 	struct thread *td;
2213 	register struct nlstat_args /* {
2214 		char *path;
2215 		struct nstat *ub;
2216 	} */ *uap;
2217 {
2218 	struct stat sb;
2219 	struct nstat nsb;
2220 	int error;
2221 
2222 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2223 	if (error)
2224 		return (error);
2225 	cvtnstat(&sb, &nsb);
2226 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2227 	return (error);
2228 }
2229 
2230 /*
2231  * Get configurable pathname variables.
2232  */
2233 #ifndef _SYS_SYSPROTO_H_
2234 struct pathconf_args {
2235 	char	*path;
2236 	int	name;
2237 };
2238 #endif
2239 int
2240 pathconf(td, uap)
2241 	struct thread *td;
2242 	register struct pathconf_args /* {
2243 		char *path;
2244 		int name;
2245 	} */ *uap;
2246 {
2247 
2248 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2249 }
2250 
2251 int
2252 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2253 {
2254 	struct nameidata nd;
2255 	int error, vfslocked;
2256 
2257 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2258 	    pathseg, path, td);
2259 	if ((error = namei(&nd)) != 0)
2260 		return (error);
2261 	vfslocked = NDHASGIANT(&nd);
2262 	NDFREE(&nd, NDF_ONLY_PNBUF);
2263 
2264 	/* If asynchronous I/O is available, it works for all files. */
2265 	if (name == _PC_ASYNC_IO)
2266 		td->td_retval[0] = async_io_version;
2267 	else
2268 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2269 	vput(nd.ni_vp);
2270 	VFS_UNLOCK_GIANT(vfslocked);
2271 	return (error);
2272 }
2273 
2274 /*
2275  * Return target name of a symbolic link.
2276  */
2277 #ifndef _SYS_SYSPROTO_H_
2278 struct readlink_args {
2279 	char	*path;
2280 	char	*buf;
2281 	int	count;
2282 };
2283 #endif
2284 int
2285 readlink(td, uap)
2286 	struct thread *td;
2287 	register struct readlink_args /* {
2288 		char *path;
2289 		char *buf;
2290 		int count;
2291 	} */ *uap;
2292 {
2293 
2294 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2295 	    UIO_USERSPACE, uap->count));
2296 }
2297 
2298 int
2299 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2300     enum uio_seg bufseg, int count)
2301 {
2302 	register struct vnode *vp;
2303 	struct iovec aiov;
2304 	struct uio auio;
2305 	int error;
2306 	struct nameidata nd;
2307 	int vfslocked;
2308 
2309 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2310 	    pathseg, path, td);
2311 	if ((error = namei(&nd)) != 0)
2312 		return (error);
2313 	NDFREE(&nd, NDF_ONLY_PNBUF);
2314 	vfslocked = NDHASGIANT(&nd);
2315 	vp = nd.ni_vp;
2316 #ifdef MAC
2317 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2318 	if (error) {
2319 		vput(vp);
2320 		VFS_UNLOCK_GIANT(vfslocked);
2321 		return (error);
2322 	}
2323 #endif
2324 	if (vp->v_type != VLNK)
2325 		error = EINVAL;
2326 	else {
2327 		aiov.iov_base = buf;
2328 		aiov.iov_len = count;
2329 		auio.uio_iov = &aiov;
2330 		auio.uio_iovcnt = 1;
2331 		auio.uio_offset = 0;
2332 		auio.uio_rw = UIO_READ;
2333 		auio.uio_segflg = bufseg;
2334 		auio.uio_td = td;
2335 		auio.uio_resid = count;
2336 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2337 	}
2338 	vput(vp);
2339 	VFS_UNLOCK_GIANT(vfslocked);
2340 	td->td_retval[0] = count - auio.uio_resid;
2341 	return (error);
2342 }
2343 
2344 /*
2345  * Common implementation code for chflags() and fchflags().
2346  */
2347 static int
2348 setfflags(td, vp, flags)
2349 	struct thread *td;
2350 	struct vnode *vp;
2351 	int flags;
2352 {
2353 	int error;
2354 	struct mount *mp;
2355 	struct vattr vattr;
2356 
2357 	/*
2358 	 * Prevent non-root users from setting flags on devices.  When
2359 	 * a device is reused, users can retain ownership of the device
2360 	 * if they are allowed to set flags and programs assume that
2361 	 * chown can't fail when done as root.
2362 	 */
2363 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2364 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2365 		if (error)
2366 			return (error);
2367 	}
2368 
2369 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2370 		return (error);
2371 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2372 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2373 	VATTR_NULL(&vattr);
2374 	vattr.va_flags = flags;
2375 #ifdef MAC
2376 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2377 	if (error == 0)
2378 #endif
2379 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2380 	VOP_UNLOCK(vp, 0, td);
2381 	vn_finished_write(mp);
2382 	return (error);
2383 }
2384 
2385 /*
2386  * Change flags of a file given a path name.
2387  */
2388 #ifndef _SYS_SYSPROTO_H_
2389 struct chflags_args {
2390 	char	*path;
2391 	int	flags;
2392 };
2393 #endif
2394 int
2395 chflags(td, uap)
2396 	struct thread *td;
2397 	register struct chflags_args /* {
2398 		char *path;
2399 		int flags;
2400 	} */ *uap;
2401 {
2402 	int error;
2403 	struct nameidata nd;
2404 	int vfslocked;
2405 
2406 	AUDIT_ARG(fflags, uap->flags);
2407 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2408 	    uap->path, td);
2409 	if ((error = namei(&nd)) != 0)
2410 		return (error);
2411 	NDFREE(&nd, NDF_ONLY_PNBUF);
2412 	vfslocked = NDHASGIANT(&nd);
2413 	error = setfflags(td, nd.ni_vp, uap->flags);
2414 	vrele(nd.ni_vp);
2415 	VFS_UNLOCK_GIANT(vfslocked);
2416 	return (error);
2417 }
2418 
2419 /*
2420  * Same as chflags() but doesn't follow symlinks.
2421  */
2422 int
2423 lchflags(td, uap)
2424 	struct thread *td;
2425 	register struct lchflags_args /* {
2426 		char *path;
2427 		int flags;
2428 	} */ *uap;
2429 {
2430 	int error;
2431 	struct nameidata nd;
2432 	int vfslocked;
2433 
2434 	AUDIT_ARG(fflags, uap->flags);
2435 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2436 	    uap->path, td);
2437 	if ((error = namei(&nd)) != 0)
2438 		return (error);
2439 	vfslocked = NDHASGIANT(&nd);
2440 	NDFREE(&nd, NDF_ONLY_PNBUF);
2441 	error = setfflags(td, nd.ni_vp, uap->flags);
2442 	vrele(nd.ni_vp);
2443 	VFS_UNLOCK_GIANT(vfslocked);
2444 	return (error);
2445 }
2446 
2447 /*
2448  * Change flags of a file given a file descriptor.
2449  */
2450 #ifndef _SYS_SYSPROTO_H_
2451 struct fchflags_args {
2452 	int	fd;
2453 	int	flags;
2454 };
2455 #endif
2456 int
2457 fchflags(td, uap)
2458 	struct thread *td;
2459 	register struct fchflags_args /* {
2460 		int fd;
2461 		int flags;
2462 	} */ *uap;
2463 {
2464 	struct file *fp;
2465 	int vfslocked;
2466 	int error;
2467 
2468 	AUDIT_ARG(fd, uap->fd);
2469 	AUDIT_ARG(fflags, uap->flags);
2470 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2471 		return (error);
2472 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2473 #ifdef AUDIT
2474 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2475 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2476 	VOP_UNLOCK(fp->f_vnode, 0, td);
2477 #endif
2478 	error = setfflags(td, fp->f_vnode, uap->flags);
2479 	VFS_UNLOCK_GIANT(vfslocked);
2480 	fdrop(fp, td);
2481 	return (error);
2482 }
2483 
2484 /*
2485  * Common implementation code for chmod(), lchmod() and fchmod().
2486  */
2487 static int
2488 setfmode(td, vp, mode)
2489 	struct thread *td;
2490 	struct vnode *vp;
2491 	int mode;
2492 {
2493 	int error;
2494 	struct mount *mp;
2495 	struct vattr vattr;
2496 
2497 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2498 		return (error);
2499 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2500 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2501 	VATTR_NULL(&vattr);
2502 	vattr.va_mode = mode & ALLPERMS;
2503 #ifdef MAC
2504 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2505 	if (error == 0)
2506 #endif
2507 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2508 	VOP_UNLOCK(vp, 0, td);
2509 	vn_finished_write(mp);
2510 	return (error);
2511 }
2512 
2513 /*
2514  * Change mode of a file given path name.
2515  */
2516 #ifndef _SYS_SYSPROTO_H_
2517 struct chmod_args {
2518 	char	*path;
2519 	int	mode;
2520 };
2521 #endif
2522 int
2523 chmod(td, uap)
2524 	struct thread *td;
2525 	register struct chmod_args /* {
2526 		char *path;
2527 		int mode;
2528 	} */ *uap;
2529 {
2530 
2531 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2532 }
2533 
2534 int
2535 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2536 {
2537 	int error;
2538 	struct nameidata nd;
2539 	int vfslocked;
2540 
2541 	AUDIT_ARG(mode, mode);
2542 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2543 	if ((error = namei(&nd)) != 0)
2544 		return (error);
2545 	vfslocked = NDHASGIANT(&nd);
2546 	NDFREE(&nd, NDF_ONLY_PNBUF);
2547 	error = setfmode(td, nd.ni_vp, mode);
2548 	vrele(nd.ni_vp);
2549 	VFS_UNLOCK_GIANT(vfslocked);
2550 	return (error);
2551 }
2552 
2553 /*
2554  * Change mode of a file given path name (don't follow links.)
2555  */
2556 #ifndef _SYS_SYSPROTO_H_
2557 struct lchmod_args {
2558 	char	*path;
2559 	int	mode;
2560 };
2561 #endif
2562 int
2563 lchmod(td, uap)
2564 	struct thread *td;
2565 	register struct lchmod_args /* {
2566 		char *path;
2567 		int mode;
2568 	} */ *uap;
2569 {
2570 	int error;
2571 	struct nameidata nd;
2572 	int vfslocked;
2573 
2574 	AUDIT_ARG(mode, (mode_t)uap->mode);
2575 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2576 	    uap->path, td);
2577 	if ((error = namei(&nd)) != 0)
2578 		return (error);
2579 	vfslocked = NDHASGIANT(&nd);
2580 	NDFREE(&nd, NDF_ONLY_PNBUF);
2581 	error = setfmode(td, nd.ni_vp, uap->mode);
2582 	vrele(nd.ni_vp);
2583 	VFS_UNLOCK_GIANT(vfslocked);
2584 	return (error);
2585 }
2586 
2587 /*
2588  * Change mode of a file given a file descriptor.
2589  */
2590 #ifndef _SYS_SYSPROTO_H_
2591 struct fchmod_args {
2592 	int	fd;
2593 	int	mode;
2594 };
2595 #endif
2596 int
2597 fchmod(td, uap)
2598 	struct thread *td;
2599 	register struct fchmod_args /* {
2600 		int fd;
2601 		int mode;
2602 	} */ *uap;
2603 {
2604 	struct file *fp;
2605 	int vfslocked;
2606 	int error;
2607 
2608 	AUDIT_ARG(fd, uap->fd);
2609 	AUDIT_ARG(mode, uap->mode);
2610 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2611 		return (error);
2612 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2613 #ifdef AUDIT
2614 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2615 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2616 	VOP_UNLOCK(fp->f_vnode, 0, td);
2617 #endif
2618 	error = setfmode(td, fp->f_vnode, uap->mode);
2619 	VFS_UNLOCK_GIANT(vfslocked);
2620 	fdrop(fp, td);
2621 	return (error);
2622 }
2623 
2624 /*
2625  * Common implementation for chown(), lchown(), and fchown()
2626  */
2627 static int
2628 setfown(td, vp, uid, gid)
2629 	struct thread *td;
2630 	struct vnode *vp;
2631 	uid_t uid;
2632 	gid_t gid;
2633 {
2634 	int error;
2635 	struct mount *mp;
2636 	struct vattr vattr;
2637 
2638 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2639 		return (error);
2640 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2641 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2642 	VATTR_NULL(&vattr);
2643 	vattr.va_uid = uid;
2644 	vattr.va_gid = gid;
2645 #ifdef MAC
2646 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2647 	    vattr.va_gid);
2648 	if (error == 0)
2649 #endif
2650 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2651 	VOP_UNLOCK(vp, 0, td);
2652 	vn_finished_write(mp);
2653 	return (error);
2654 }
2655 
2656 /*
2657  * Set ownership given a path name.
2658  */
2659 #ifndef _SYS_SYSPROTO_H_
2660 struct chown_args {
2661 	char	*path;
2662 	int	uid;
2663 	int	gid;
2664 };
2665 #endif
2666 int
2667 chown(td, uap)
2668 	struct thread *td;
2669 	register struct chown_args /* {
2670 		char *path;
2671 		int uid;
2672 		int gid;
2673 	} */ *uap;
2674 {
2675 
2676 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2677 }
2678 
2679 int
2680 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2681     int gid)
2682 {
2683 	int error;
2684 	struct nameidata nd;
2685 	int vfslocked;
2686 
2687 	AUDIT_ARG(owner, uid, gid);
2688 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2689 	if ((error = namei(&nd)) != 0)
2690 		return (error);
2691 	vfslocked = NDHASGIANT(&nd);
2692 	NDFREE(&nd, NDF_ONLY_PNBUF);
2693 	error = setfown(td, nd.ni_vp, uid, gid);
2694 	vrele(nd.ni_vp);
2695 	VFS_UNLOCK_GIANT(vfslocked);
2696 	return (error);
2697 }
2698 
2699 /*
2700  * Set ownership given a path name, do not cross symlinks.
2701  */
2702 #ifndef _SYS_SYSPROTO_H_
2703 struct lchown_args {
2704 	char	*path;
2705 	int	uid;
2706 	int	gid;
2707 };
2708 #endif
2709 int
2710 lchown(td, uap)
2711 	struct thread *td;
2712 	register struct lchown_args /* {
2713 		char *path;
2714 		int uid;
2715 		int gid;
2716 	} */ *uap;
2717 {
2718 
2719 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2720 }
2721 
2722 int
2723 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2724     int gid)
2725 {
2726 	int error;
2727 	struct nameidata nd;
2728 	int vfslocked;
2729 
2730 	AUDIT_ARG(owner, uid, gid);
2731 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2732 	if ((error = namei(&nd)) != 0)
2733 		return (error);
2734 	vfslocked = NDHASGIANT(&nd);
2735 	NDFREE(&nd, NDF_ONLY_PNBUF);
2736 	error = setfown(td, nd.ni_vp, uid, gid);
2737 	vrele(nd.ni_vp);
2738 	VFS_UNLOCK_GIANT(vfslocked);
2739 	return (error);
2740 }
2741 
2742 /*
2743  * Set ownership given a file descriptor.
2744  */
2745 #ifndef _SYS_SYSPROTO_H_
2746 struct fchown_args {
2747 	int	fd;
2748 	int	uid;
2749 	int	gid;
2750 };
2751 #endif
2752 int
2753 fchown(td, uap)
2754 	struct thread *td;
2755 	register struct fchown_args /* {
2756 		int fd;
2757 		int uid;
2758 		int gid;
2759 	} */ *uap;
2760 {
2761 	struct file *fp;
2762 	int vfslocked;
2763 	int error;
2764 
2765 	AUDIT_ARG(fd, uap->fd);
2766 	AUDIT_ARG(owner, uap->uid, uap->gid);
2767 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2768 		return (error);
2769 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2770 #ifdef AUDIT
2771 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2772 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2773 	VOP_UNLOCK(fp->f_vnode, 0, td);
2774 #endif
2775 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2776 	VFS_UNLOCK_GIANT(vfslocked);
2777 	fdrop(fp, td);
2778 	return (error);
2779 }
2780 
2781 /*
2782  * Common implementation code for utimes(), lutimes(), and futimes().
2783  */
2784 static int
2785 getutimes(usrtvp, tvpseg, tsp)
2786 	const struct timeval *usrtvp;
2787 	enum uio_seg tvpseg;
2788 	struct timespec *tsp;
2789 {
2790 	struct timeval tv[2];
2791 	const struct timeval *tvp;
2792 	int error;
2793 
2794 	if (usrtvp == NULL) {
2795 		microtime(&tv[0]);
2796 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2797 		tsp[1] = tsp[0];
2798 	} else {
2799 		if (tvpseg == UIO_SYSSPACE) {
2800 			tvp = usrtvp;
2801 		} else {
2802 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2803 				return (error);
2804 			tvp = tv;
2805 		}
2806 
2807 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2808 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2809 			return (EINVAL);
2810 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2811 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2812 	}
2813 	return (0);
2814 }
2815 
2816 /*
2817  * Common implementation code for utimes(), lutimes(), and futimes().
2818  */
2819 static int
2820 setutimes(td, vp, ts, numtimes, nullflag)
2821 	struct thread *td;
2822 	struct vnode *vp;
2823 	const struct timespec *ts;
2824 	int numtimes;
2825 	int nullflag;
2826 {
2827 	int error, setbirthtime;
2828 	struct mount *mp;
2829 	struct vattr vattr;
2830 
2831 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2832 		return (error);
2833 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2834 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2835 	setbirthtime = 0;
2836 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2837 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2838 		setbirthtime = 1;
2839 	VATTR_NULL(&vattr);
2840 	vattr.va_atime = ts[0];
2841 	vattr.va_mtime = ts[1];
2842 	if (setbirthtime)
2843 		vattr.va_birthtime = ts[1];
2844 	if (numtimes > 2)
2845 		vattr.va_birthtime = ts[2];
2846 	if (nullflag)
2847 		vattr.va_vaflags |= VA_UTIMES_NULL;
2848 #ifdef MAC
2849 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2850 	    vattr.va_mtime);
2851 #endif
2852 	if (error == 0)
2853 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2854 	VOP_UNLOCK(vp, 0, td);
2855 	vn_finished_write(mp);
2856 	return (error);
2857 }
2858 
2859 /*
2860  * Set the access and modification times of a file.
2861  */
2862 #ifndef _SYS_SYSPROTO_H_
2863 struct utimes_args {
2864 	char	*path;
2865 	struct	timeval *tptr;
2866 };
2867 #endif
2868 int
2869 utimes(td, uap)
2870 	struct thread *td;
2871 	register struct utimes_args /* {
2872 		char *path;
2873 		struct timeval *tptr;
2874 	} */ *uap;
2875 {
2876 
2877 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2878 	    UIO_USERSPACE));
2879 }
2880 
2881 int
2882 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2883     struct timeval *tptr, enum uio_seg tptrseg)
2884 {
2885 	struct timespec ts[2];
2886 	int error;
2887 	struct nameidata nd;
2888 	int vfslocked;
2889 
2890 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2891 		return (error);
2892 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2893 	if ((error = namei(&nd)) != 0)
2894 		return (error);
2895 	vfslocked = NDHASGIANT(&nd);
2896 	NDFREE(&nd, NDF_ONLY_PNBUF);
2897 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2898 	vrele(nd.ni_vp);
2899 	VFS_UNLOCK_GIANT(vfslocked);
2900 	return (error);
2901 }
2902 
2903 /*
2904  * Set the access and modification times of a file.
2905  */
2906 #ifndef _SYS_SYSPROTO_H_
2907 struct lutimes_args {
2908 	char	*path;
2909 	struct	timeval *tptr;
2910 };
2911 #endif
2912 int
2913 lutimes(td, uap)
2914 	struct thread *td;
2915 	register struct lutimes_args /* {
2916 		char *path;
2917 		struct timeval *tptr;
2918 	} */ *uap;
2919 {
2920 
2921 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2922 	    UIO_USERSPACE));
2923 }
2924 
2925 int
2926 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2927     struct timeval *tptr, enum uio_seg tptrseg)
2928 {
2929 	struct timespec ts[2];
2930 	int error;
2931 	struct nameidata nd;
2932 	int vfslocked;
2933 
2934 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2935 		return (error);
2936 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2937 	if ((error = namei(&nd)) != 0)
2938 		return (error);
2939 	vfslocked = NDHASGIANT(&nd);
2940 	NDFREE(&nd, NDF_ONLY_PNBUF);
2941 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2942 	vrele(nd.ni_vp);
2943 	VFS_UNLOCK_GIANT(vfslocked);
2944 	return (error);
2945 }
2946 
2947 /*
2948  * Set the access and modification times of a file.
2949  */
2950 #ifndef _SYS_SYSPROTO_H_
2951 struct futimes_args {
2952 	int	fd;
2953 	struct	timeval *tptr;
2954 };
2955 #endif
2956 int
2957 futimes(td, uap)
2958 	struct thread *td;
2959 	register struct futimes_args /* {
2960 		int  fd;
2961 		struct timeval *tptr;
2962 	} */ *uap;
2963 {
2964 
2965 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2966 }
2967 
2968 int
2969 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2970     enum uio_seg tptrseg)
2971 {
2972 	struct timespec ts[2];
2973 	struct file *fp;
2974 	int vfslocked;
2975 	int error;
2976 
2977 	AUDIT_ARG(fd, fd);
2978 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2979 		return (error);
2980 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2981 		return (error);
2982 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2983 #ifdef AUDIT
2984 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2985 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2986 	VOP_UNLOCK(fp->f_vnode, 0, td);
2987 #endif
2988 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2989 	VFS_UNLOCK_GIANT(vfslocked);
2990 	fdrop(fp, td);
2991 	return (error);
2992 }
2993 
2994 /*
2995  * Truncate a file given its path name.
2996  */
2997 #ifndef _SYS_SYSPROTO_H_
2998 struct truncate_args {
2999 	char	*path;
3000 	int	pad;
3001 	off_t	length;
3002 };
3003 #endif
3004 int
3005 truncate(td, uap)
3006 	struct thread *td;
3007 	register struct truncate_args /* {
3008 		char *path;
3009 		int pad;
3010 		off_t length;
3011 	} */ *uap;
3012 {
3013 
3014 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3015 }
3016 
3017 int
3018 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3019 {
3020 	struct mount *mp;
3021 	struct vnode *vp;
3022 	struct vattr vattr;
3023 	int error;
3024 	struct nameidata nd;
3025 	int vfslocked;
3026 
3027 	if (length < 0)
3028 		return(EINVAL);
3029 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3030 	if ((error = namei(&nd)) != 0)
3031 		return (error);
3032 	vfslocked = NDHASGIANT(&nd);
3033 	vp = nd.ni_vp;
3034 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3035 		vrele(vp);
3036 		VFS_UNLOCK_GIANT(vfslocked);
3037 		return (error);
3038 	}
3039 	NDFREE(&nd, NDF_ONLY_PNBUF);
3040 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3041 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3042 	if (vp->v_type == VDIR)
3043 		error = EISDIR;
3044 #ifdef MAC
3045 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3046 	}
3047 #endif
3048 	else if ((error = vn_writechk(vp)) == 0 &&
3049 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3050 		VATTR_NULL(&vattr);
3051 		vattr.va_size = length;
3052 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3053 	}
3054 	vput(vp);
3055 	vn_finished_write(mp);
3056 	VFS_UNLOCK_GIANT(vfslocked);
3057 	return (error);
3058 }
3059 
3060 /*
3061  * Truncate a file given a file descriptor.
3062  */
3063 #ifndef _SYS_SYSPROTO_H_
3064 struct ftruncate_args {
3065 	int	fd;
3066 	int	pad;
3067 	off_t	length;
3068 };
3069 #endif
3070 int
3071 ftruncate(td, uap)
3072 	struct thread *td;
3073 	register struct ftruncate_args /* {
3074 		int fd;
3075 		int pad;
3076 		off_t length;
3077 	} */ *uap;
3078 {
3079 	struct mount *mp;
3080 	struct vattr vattr;
3081 	struct vnode *vp;
3082 	struct file *fp;
3083 	int vfslocked;
3084 	int error;
3085 
3086 	AUDIT_ARG(fd, uap->fd);
3087 	if (uap->length < 0)
3088 		return(EINVAL);
3089 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3090 		return (error);
3091 	if ((fp->f_flag & FWRITE) == 0) {
3092 		fdrop(fp, td);
3093 		return (EINVAL);
3094 	}
3095 	vp = fp->f_vnode;
3096 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3097 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3098 		goto drop;
3099 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3100 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3101 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3102 	if (vp->v_type == VDIR)
3103 		error = EISDIR;
3104 #ifdef MAC
3105 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3106 	    vp))) {
3107 	}
3108 #endif
3109 	else if ((error = vn_writechk(vp)) == 0) {
3110 		VATTR_NULL(&vattr);
3111 		vattr.va_size = uap->length;
3112 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3113 	}
3114 	VOP_UNLOCK(vp, 0, td);
3115 	vn_finished_write(mp);
3116 drop:
3117 	VFS_UNLOCK_GIANT(vfslocked);
3118 	fdrop(fp, td);
3119 	return (error);
3120 }
3121 
3122 #if defined(COMPAT_43)
3123 /*
3124  * Truncate a file given its path name.
3125  */
3126 #ifndef _SYS_SYSPROTO_H_
3127 struct otruncate_args {
3128 	char	*path;
3129 	long	length;
3130 };
3131 #endif
3132 int
3133 otruncate(td, uap)
3134 	struct thread *td;
3135 	register struct otruncate_args /* {
3136 		char *path;
3137 		long length;
3138 	} */ *uap;
3139 {
3140 	struct truncate_args /* {
3141 		char *path;
3142 		int pad;
3143 		off_t length;
3144 	} */ nuap;
3145 
3146 	nuap.path = uap->path;
3147 	nuap.length = uap->length;
3148 	return (truncate(td, &nuap));
3149 }
3150 
3151 /*
3152  * Truncate a file given a file descriptor.
3153  */
3154 #ifndef _SYS_SYSPROTO_H_
3155 struct oftruncate_args {
3156 	int	fd;
3157 	long	length;
3158 };
3159 #endif
3160 int
3161 oftruncate(td, uap)
3162 	struct thread *td;
3163 	register struct oftruncate_args /* {
3164 		int fd;
3165 		long length;
3166 	} */ *uap;
3167 {
3168 	struct ftruncate_args /* {
3169 		int fd;
3170 		int pad;
3171 		off_t length;
3172 	} */ nuap;
3173 
3174 	nuap.fd = uap->fd;
3175 	nuap.length = uap->length;
3176 	return (ftruncate(td, &nuap));
3177 }
3178 #endif /* COMPAT_43 */
3179 
3180 /*
3181  * Sync an open file.
3182  */
3183 #ifndef _SYS_SYSPROTO_H_
3184 struct fsync_args {
3185 	int	fd;
3186 };
3187 #endif
3188 int
3189 fsync(td, uap)
3190 	struct thread *td;
3191 	struct fsync_args /* {
3192 		int fd;
3193 	} */ *uap;
3194 {
3195 	struct vnode *vp;
3196 	struct mount *mp;
3197 	struct file *fp;
3198 	int vfslocked;
3199 	int error;
3200 
3201 	AUDIT_ARG(fd, uap->fd);
3202 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3203 		return (error);
3204 	vp = fp->f_vnode;
3205 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3206 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3207 		goto drop;
3208 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3209 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3210 	if (vp->v_object != NULL) {
3211 		VM_OBJECT_LOCK(vp->v_object);
3212 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3213 		VM_OBJECT_UNLOCK(vp->v_object);
3214 	}
3215 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3216 
3217 	VOP_UNLOCK(vp, 0, td);
3218 	vn_finished_write(mp);
3219 drop:
3220 	VFS_UNLOCK_GIANT(vfslocked);
3221 	fdrop(fp, td);
3222 	return (error);
3223 }
3224 
3225 /*
3226  * Rename files.  Source and destination must either both be directories,
3227  * or both not be directories.  If target is a directory, it must be empty.
3228  */
3229 #ifndef _SYS_SYSPROTO_H_
3230 struct rename_args {
3231 	char	*from;
3232 	char	*to;
3233 };
3234 #endif
3235 int
3236 rename(td, uap)
3237 	struct thread *td;
3238 	register struct rename_args /* {
3239 		char *from;
3240 		char *to;
3241 	} */ *uap;
3242 {
3243 
3244 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3245 }
3246 
3247 int
3248 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3249 {
3250 	struct mount *mp = NULL;
3251 	struct vnode *tvp, *fvp, *tdvp;
3252 	struct nameidata fromnd, tond;
3253 	int tvfslocked;
3254 	int fvfslocked;
3255 	int error;
3256 
3257 	bwillwrite();
3258 #ifdef MAC
3259 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3260 	    AUDITVNODE1, pathseg, from, td);
3261 #else
3262 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3263 	    AUDITVNODE1, pathseg, from, td);
3264 #endif
3265 	if ((error = namei(&fromnd)) != 0)
3266 		return (error);
3267 	fvfslocked = NDHASGIANT(&fromnd);
3268 	tvfslocked = 0;
3269 #ifdef MAC
3270 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3271 	    fromnd.ni_vp, &fromnd.ni_cnd);
3272 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3273 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3274 #endif
3275 	fvp = fromnd.ni_vp;
3276 	if (error == 0)
3277 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3278 	if (error != 0) {
3279 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3280 		vrele(fromnd.ni_dvp);
3281 		vrele(fvp);
3282 		goto out1;
3283 	}
3284 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3285 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3286 	if (fromnd.ni_vp->v_type == VDIR)
3287 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3288 	if ((error = namei(&tond)) != 0) {
3289 		/* Translate error code for rename("dir1", "dir2/."). */
3290 		if (error == EISDIR && fvp->v_type == VDIR)
3291 			error = EINVAL;
3292 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3293 		vrele(fromnd.ni_dvp);
3294 		vrele(fvp);
3295 		vn_finished_write(mp);
3296 		goto out1;
3297 	}
3298 	tvfslocked = NDHASGIANT(&tond);
3299 	tdvp = tond.ni_dvp;
3300 	tvp = tond.ni_vp;
3301 	if (tvp != NULL) {
3302 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3303 			error = ENOTDIR;
3304 			goto out;
3305 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3306 			error = EISDIR;
3307 			goto out;
3308 		}
3309 	}
3310 	if (fvp == tdvp)
3311 		error = EINVAL;
3312 	/*
3313 	 * If the source is the same as the destination (that is, if they
3314 	 * are links to the same vnode), then there is nothing to do.
3315 	 */
3316 	if (fvp == tvp)
3317 		error = -1;
3318 #ifdef MAC
3319 	else
3320 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3321 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3322 #endif
3323 out:
3324 	if (!error) {
3325 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3326 		if (fromnd.ni_dvp != tdvp) {
3327 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3328 		}
3329 		if (tvp) {
3330 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3331 		}
3332 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3333 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3334 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3335 		NDFREE(&tond, NDF_ONLY_PNBUF);
3336 	} else {
3337 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3338 		NDFREE(&tond, NDF_ONLY_PNBUF);
3339 		if (tvp)
3340 			vput(tvp);
3341 		if (tdvp == tvp)
3342 			vrele(tdvp);
3343 		else
3344 			vput(tdvp);
3345 		vrele(fromnd.ni_dvp);
3346 		vrele(fvp);
3347 	}
3348 	vrele(tond.ni_startdir);
3349 	vn_finished_write(mp);
3350 out1:
3351 	if (fromnd.ni_startdir)
3352 		vrele(fromnd.ni_startdir);
3353 	VFS_UNLOCK_GIANT(fvfslocked);
3354 	VFS_UNLOCK_GIANT(tvfslocked);
3355 	if (error == -1)
3356 		return (0);
3357 	return (error);
3358 }
3359 
3360 /*
3361  * Make a directory file.
3362  */
3363 #ifndef _SYS_SYSPROTO_H_
3364 struct mkdir_args {
3365 	char	*path;
3366 	int	mode;
3367 };
3368 #endif
3369 int
3370 mkdir(td, uap)
3371 	struct thread *td;
3372 	register struct mkdir_args /* {
3373 		char *path;
3374 		int mode;
3375 	} */ *uap;
3376 {
3377 
3378 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3379 }
3380 
3381 int
3382 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3383 {
3384 	struct mount *mp;
3385 	struct vnode *vp;
3386 	struct vattr vattr;
3387 	int error;
3388 	struct nameidata nd;
3389 	int vfslocked;
3390 
3391 	AUDIT_ARG(mode, mode);
3392 restart:
3393 	bwillwrite();
3394 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3395 	    segflg, path, td);
3396 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3397 	if ((error = namei(&nd)) != 0)
3398 		return (error);
3399 	vfslocked = NDHASGIANT(&nd);
3400 	vp = nd.ni_vp;
3401 	if (vp != NULL) {
3402 		NDFREE(&nd, NDF_ONLY_PNBUF);
3403 		/*
3404 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3405 		 * the strange behaviour of leaving the vnode unlocked
3406 		 * if the target is the same vnode as the parent.
3407 		 */
3408 		if (vp == nd.ni_dvp)
3409 			vrele(nd.ni_dvp);
3410 		else
3411 			vput(nd.ni_dvp);
3412 		vrele(vp);
3413 		VFS_UNLOCK_GIANT(vfslocked);
3414 		return (EEXIST);
3415 	}
3416 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3417 		NDFREE(&nd, NDF_ONLY_PNBUF);
3418 		vput(nd.ni_dvp);
3419 		VFS_UNLOCK_GIANT(vfslocked);
3420 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3421 			return (error);
3422 		goto restart;
3423 	}
3424 	VATTR_NULL(&vattr);
3425 	vattr.va_type = VDIR;
3426 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3427 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3428 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3429 #ifdef MAC
3430 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3431 	    &vattr);
3432 	if (error)
3433 		goto out;
3434 #endif
3435 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3436 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3437 #ifdef MAC
3438 out:
3439 #endif
3440 	NDFREE(&nd, NDF_ONLY_PNBUF);
3441 	vput(nd.ni_dvp);
3442 	if (!error)
3443 		vput(nd.ni_vp);
3444 	vn_finished_write(mp);
3445 	VFS_UNLOCK_GIANT(vfslocked);
3446 	return (error);
3447 }
3448 
3449 /*
3450  * Remove a directory file.
3451  */
3452 #ifndef _SYS_SYSPROTO_H_
3453 struct rmdir_args {
3454 	char	*path;
3455 };
3456 #endif
3457 int
3458 rmdir(td, uap)
3459 	struct thread *td;
3460 	struct rmdir_args /* {
3461 		char *path;
3462 	} */ *uap;
3463 {
3464 
3465 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3466 }
3467 
3468 int
3469 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3470 {
3471 	struct mount *mp;
3472 	struct vnode *vp;
3473 	int error;
3474 	struct nameidata nd;
3475 	int vfslocked;
3476 
3477 restart:
3478 	bwillwrite();
3479 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3480 	    pathseg, path, td);
3481 	if ((error = namei(&nd)) != 0)
3482 		return (error);
3483 	vfslocked = NDHASGIANT(&nd);
3484 	vp = nd.ni_vp;
3485 	if (vp->v_type != VDIR) {
3486 		error = ENOTDIR;
3487 		goto out;
3488 	}
3489 	/*
3490 	 * No rmdir "." please.
3491 	 */
3492 	if (nd.ni_dvp == vp) {
3493 		error = EINVAL;
3494 		goto out;
3495 	}
3496 	/*
3497 	 * The root of a mounted filesystem cannot be deleted.
3498 	 */
3499 	if (vp->v_vflag & VV_ROOT) {
3500 		error = EBUSY;
3501 		goto out;
3502 	}
3503 #ifdef MAC
3504 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3505 	    &nd.ni_cnd);
3506 	if (error)
3507 		goto out;
3508 #endif
3509 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3510 		NDFREE(&nd, NDF_ONLY_PNBUF);
3511 		vput(vp);
3512 		if (nd.ni_dvp == vp)
3513 			vrele(nd.ni_dvp);
3514 		else
3515 			vput(nd.ni_dvp);
3516 		VFS_UNLOCK_GIANT(vfslocked);
3517 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3518 			return (error);
3519 		goto restart;
3520 	}
3521 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3522 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3523 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3524 	vn_finished_write(mp);
3525 out:
3526 	NDFREE(&nd, NDF_ONLY_PNBUF);
3527 	vput(vp);
3528 	if (nd.ni_dvp == vp)
3529 		vrele(nd.ni_dvp);
3530 	else
3531 		vput(nd.ni_dvp);
3532 	VFS_UNLOCK_GIANT(vfslocked);
3533 	return (error);
3534 }
3535 
3536 #ifdef COMPAT_43
3537 /*
3538  * Read a block of directory entries in a filesystem independent format.
3539  */
3540 #ifndef _SYS_SYSPROTO_H_
3541 struct ogetdirentries_args {
3542 	int	fd;
3543 	char	*buf;
3544 	u_int	count;
3545 	long	*basep;
3546 };
3547 #endif
3548 int
3549 ogetdirentries(td, uap)
3550 	struct thread *td;
3551 	register struct ogetdirentries_args /* {
3552 		int fd;
3553 		char *buf;
3554 		u_int count;
3555 		long *basep;
3556 	} */ *uap;
3557 {
3558 	struct vnode *vp;
3559 	struct file *fp;
3560 	struct uio auio, kuio;
3561 	struct iovec aiov, kiov;
3562 	struct dirent *dp, *edp;
3563 	caddr_t dirbuf;
3564 	int error, eofflag, readcnt, vfslocked;
3565 	long loff;
3566 
3567 	/* XXX arbitrary sanity limit on `count'. */
3568 	if (uap->count > 64 * 1024)
3569 		return (EINVAL);
3570 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3571 		return (error);
3572 	if ((fp->f_flag & FREAD) == 0) {
3573 		fdrop(fp, td);
3574 		return (EBADF);
3575 	}
3576 	vp = fp->f_vnode;
3577 unionread:
3578 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3579 	if (vp->v_type != VDIR) {
3580 		VFS_UNLOCK_GIANT(vfslocked);
3581 		fdrop(fp, td);
3582 		return (EINVAL);
3583 	}
3584 	aiov.iov_base = uap->buf;
3585 	aiov.iov_len = uap->count;
3586 	auio.uio_iov = &aiov;
3587 	auio.uio_iovcnt = 1;
3588 	auio.uio_rw = UIO_READ;
3589 	auio.uio_segflg = UIO_USERSPACE;
3590 	auio.uio_td = td;
3591 	auio.uio_resid = uap->count;
3592 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3593 	loff = auio.uio_offset = fp->f_offset;
3594 #ifdef MAC
3595 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3596 	if (error) {
3597 		VOP_UNLOCK(vp, 0, td);
3598 		VFS_UNLOCK_GIANT(vfslocked);
3599 		fdrop(fp, td);
3600 		return (error);
3601 	}
3602 #endif
3603 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3604 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3605 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3606 			    NULL, NULL);
3607 			fp->f_offset = auio.uio_offset;
3608 		} else
3609 #	endif
3610 	{
3611 		kuio = auio;
3612 		kuio.uio_iov = &kiov;
3613 		kuio.uio_segflg = UIO_SYSSPACE;
3614 		kiov.iov_len = uap->count;
3615 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3616 		kiov.iov_base = dirbuf;
3617 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3618 			    NULL, NULL);
3619 		fp->f_offset = kuio.uio_offset;
3620 		if (error == 0) {
3621 			readcnt = uap->count - kuio.uio_resid;
3622 			edp = (struct dirent *)&dirbuf[readcnt];
3623 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3624 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3625 					/*
3626 					 * The expected low byte of
3627 					 * dp->d_namlen is our dp->d_type.
3628 					 * The high MBZ byte of dp->d_namlen
3629 					 * is our dp->d_namlen.
3630 					 */
3631 					dp->d_type = dp->d_namlen;
3632 					dp->d_namlen = 0;
3633 #				else
3634 					/*
3635 					 * The dp->d_type is the high byte
3636 					 * of the expected dp->d_namlen,
3637 					 * so must be zero'ed.
3638 					 */
3639 					dp->d_type = 0;
3640 #				endif
3641 				if (dp->d_reclen > 0) {
3642 					dp = (struct dirent *)
3643 					    ((char *)dp + dp->d_reclen);
3644 				} else {
3645 					error = EIO;
3646 					break;
3647 				}
3648 			}
3649 			if (dp >= edp)
3650 				error = uiomove(dirbuf, readcnt, &auio);
3651 		}
3652 		FREE(dirbuf, M_TEMP);
3653 	}
3654 	VOP_UNLOCK(vp, 0, td);
3655 	if (error) {
3656 		VFS_UNLOCK_GIANT(vfslocked);
3657 		fdrop(fp, td);
3658 		return (error);
3659 	}
3660 	if (uap->count == auio.uio_resid) {
3661 		if (union_dircheckp) {
3662 			error = union_dircheckp(td, &vp, fp);
3663 			if (error == -1) {
3664 				VFS_UNLOCK_GIANT(vfslocked);
3665 				goto unionread;
3666 			}
3667 			if (error) {
3668 				VFS_UNLOCK_GIANT(vfslocked);
3669 				fdrop(fp, td);
3670 				return (error);
3671 			}
3672 		}
3673 		/*
3674 		 * XXX We could delay dropping the lock above but
3675 		 * union_dircheckp complicates things.
3676 		 */
3677 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3678 		if ((vp->v_vflag & VV_ROOT) &&
3679 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3680 			struct vnode *tvp = vp;
3681 			vp = vp->v_mount->mnt_vnodecovered;
3682 			VREF(vp);
3683 			fp->f_vnode = vp;
3684 			fp->f_data = vp;
3685 			fp->f_offset = 0;
3686 			vput(tvp);
3687 			VFS_UNLOCK_GIANT(vfslocked);
3688 			goto unionread;
3689 		}
3690 		VOP_UNLOCK(vp, 0, td);
3691 	}
3692 	VFS_UNLOCK_GIANT(vfslocked);
3693 	error = copyout(&loff, uap->basep, sizeof(long));
3694 	fdrop(fp, td);
3695 	td->td_retval[0] = uap->count - auio.uio_resid;
3696 	return (error);
3697 }
3698 #endif /* COMPAT_43 */
3699 
3700 /*
3701  * Read a block of directory entries in a filesystem independent format.
3702  */
3703 #ifndef _SYS_SYSPROTO_H_
3704 struct getdirentries_args {
3705 	int	fd;
3706 	char	*buf;
3707 	u_int	count;
3708 	long	*basep;
3709 };
3710 #endif
3711 int
3712 getdirentries(td, uap)
3713 	struct thread *td;
3714 	register struct getdirentries_args /* {
3715 		int fd;
3716 		char *buf;
3717 		u_int count;
3718 		long *basep;
3719 	} */ *uap;
3720 {
3721 	struct vnode *vp;
3722 	struct file *fp;
3723 	struct uio auio;
3724 	struct iovec aiov;
3725 	int vfslocked;
3726 	long loff;
3727 	int error, eofflag;
3728 
3729 	AUDIT_ARG(fd, uap->fd);
3730 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3731 		return (error);
3732 	if ((fp->f_flag & FREAD) == 0) {
3733 		fdrop(fp, td);
3734 		return (EBADF);
3735 	}
3736 	vp = fp->f_vnode;
3737 unionread:
3738 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3739 	if (vp->v_type != VDIR) {
3740 		error = EINVAL;
3741 		goto fail;
3742 	}
3743 	aiov.iov_base = uap->buf;
3744 	aiov.iov_len = uap->count;
3745 	auio.uio_iov = &aiov;
3746 	auio.uio_iovcnt = 1;
3747 	auio.uio_rw = UIO_READ;
3748 	auio.uio_segflg = UIO_USERSPACE;
3749 	auio.uio_td = td;
3750 	auio.uio_resid = uap->count;
3751 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3752 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3753 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3754 	loff = auio.uio_offset = fp->f_offset;
3755 #ifdef MAC
3756 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3757 	if (error == 0)
3758 #endif
3759 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3760 		    NULL);
3761 	fp->f_offset = auio.uio_offset;
3762 	VOP_UNLOCK(vp, 0, td);
3763 	if (error)
3764 		goto fail;
3765 	if (uap->count == auio.uio_resid) {
3766 		if (union_dircheckp) {
3767 			error = union_dircheckp(td, &vp, fp);
3768 			if (error == -1) {
3769 				VFS_UNLOCK_GIANT(vfslocked);
3770 				goto unionread;
3771 			}
3772 			if (error)
3773 				goto fail;
3774 		}
3775 		/*
3776 		 * XXX We could delay dropping the lock above but
3777 		 * union_dircheckp complicates things.
3778 		 */
3779 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3780 		if ((vp->v_vflag & VV_ROOT) &&
3781 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3782 			struct vnode *tvp = vp;
3783 			vp = vp->v_mount->mnt_vnodecovered;
3784 			VREF(vp);
3785 			fp->f_vnode = vp;
3786 			fp->f_data = vp;
3787 			fp->f_offset = 0;
3788 			vput(tvp);
3789 			VFS_UNLOCK_GIANT(vfslocked);
3790 			goto unionread;
3791 		}
3792 		VOP_UNLOCK(vp, 0, td);
3793 	}
3794 	if (uap->basep != NULL) {
3795 		error = copyout(&loff, uap->basep, sizeof(long));
3796 	}
3797 	td->td_retval[0] = uap->count - auio.uio_resid;
3798 fail:
3799 	VFS_UNLOCK_GIANT(vfslocked);
3800 	fdrop(fp, td);
3801 	return (error);
3802 }
3803 #ifndef _SYS_SYSPROTO_H_
3804 struct getdents_args {
3805 	int fd;
3806 	char *buf;
3807 	size_t count;
3808 };
3809 #endif
3810 int
3811 getdents(td, uap)
3812 	struct thread *td;
3813 	register struct getdents_args /* {
3814 		int fd;
3815 		char *buf;
3816 		u_int count;
3817 	} */ *uap;
3818 {
3819 	struct getdirentries_args ap;
3820 	ap.fd = uap->fd;
3821 	ap.buf = uap->buf;
3822 	ap.count = uap->count;
3823 	ap.basep = NULL;
3824 	return (getdirentries(td, &ap));
3825 }
3826 
3827 /*
3828  * Set the mode mask for creation of filesystem nodes.
3829  *
3830  * MP SAFE
3831  */
3832 #ifndef _SYS_SYSPROTO_H_
3833 struct umask_args {
3834 	int	newmask;
3835 };
3836 #endif
3837 int
3838 umask(td, uap)
3839 	struct thread *td;
3840 	struct umask_args /* {
3841 		int newmask;
3842 	} */ *uap;
3843 {
3844 	register struct filedesc *fdp;
3845 
3846 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3847 	fdp = td->td_proc->p_fd;
3848 	td->td_retval[0] = fdp->fd_cmask;
3849 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3850 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3851 	return (0);
3852 }
3853 
3854 /*
3855  * Void all references to file by ripping underlying filesystem
3856  * away from vnode.
3857  */
3858 #ifndef _SYS_SYSPROTO_H_
3859 struct revoke_args {
3860 	char	*path;
3861 };
3862 #endif
3863 int
3864 revoke(td, uap)
3865 	struct thread *td;
3866 	register struct revoke_args /* {
3867 		char *path;
3868 	} */ *uap;
3869 {
3870 	struct vnode *vp;
3871 	struct vattr vattr;
3872 	int error;
3873 	struct nameidata nd;
3874 	int vfslocked;
3875 
3876 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3877 	    UIO_USERSPACE, uap->path, td);
3878 	if ((error = namei(&nd)) != 0)
3879 		return (error);
3880 	vfslocked = NDHASGIANT(&nd);
3881 	vp = nd.ni_vp;
3882 	NDFREE(&nd, NDF_ONLY_PNBUF);
3883 	if (vp->v_type != VCHR) {
3884 		error = EINVAL;
3885 		goto out;
3886 	}
3887 #ifdef MAC
3888 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3889 	if (error)
3890 		goto out;
3891 #endif
3892 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3893 	if (error)
3894 		goto out;
3895 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3896 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3897 		if (error)
3898 			goto out;
3899 	}
3900 	if (vcount(vp) > 1)
3901 		VOP_REVOKE(vp, REVOKEALL);
3902 out:
3903 	vput(vp);
3904 	VFS_UNLOCK_GIANT(vfslocked);
3905 	return (error);
3906 }
3907 
3908 /*
3909  * Convert a user file descriptor to a kernel file entry.
3910  * A reference on the file entry is held upon returning.
3911  */
3912 int
3913 getvnode(fdp, fd, fpp)
3914 	struct filedesc *fdp;
3915 	int fd;
3916 	struct file **fpp;
3917 {
3918 	int error;
3919 	struct file *fp;
3920 
3921 	fp = NULL;
3922 	if (fdp == NULL)
3923 		error = EBADF;
3924 	else {
3925 		FILEDESC_LOCK(fdp);
3926 		if ((u_int)fd >= fdp->fd_nfiles ||
3927 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3928 			error = EBADF;
3929 		else if (fp->f_vnode == NULL) {
3930 			fp = NULL;
3931 			error = EINVAL;
3932 		} else {
3933 			fhold(fp);
3934 			error = 0;
3935 		}
3936 		FILEDESC_UNLOCK(fdp);
3937 	}
3938 	*fpp = fp;
3939 	return (error);
3940 }
3941 
3942 /*
3943  * Get (NFS) file handle
3944  */
3945 #ifndef _SYS_SYSPROTO_H_
3946 struct lgetfh_args {
3947 	char	*fname;
3948 	fhandle_t *fhp;
3949 };
3950 #endif
3951 int
3952 lgetfh(td, uap)
3953 	struct thread *td;
3954 	register struct lgetfh_args *uap;
3955 {
3956 	struct nameidata nd;
3957 	fhandle_t fh;
3958 	register struct vnode *vp;
3959 	int vfslocked;
3960 	int error;
3961 
3962 	error = suser(td);
3963 	if (error)
3964 		return (error);
3965 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3966 	    UIO_USERSPACE, uap->fname, td);
3967 	error = namei(&nd);
3968 	if (error)
3969 		return (error);
3970 	vfslocked = NDHASGIANT(&nd);
3971 	NDFREE(&nd, NDF_ONLY_PNBUF);
3972 	vp = nd.ni_vp;
3973 	bzero(&fh, sizeof(fh));
3974 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3975 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3976 	vput(vp);
3977 	VFS_UNLOCK_GIANT(vfslocked);
3978 	if (error)
3979 		return (error);
3980 	error = copyout(&fh, uap->fhp, sizeof (fh));
3981 	return (error);
3982 }
3983 
3984 #ifndef _SYS_SYSPROTO_H_
3985 struct getfh_args {
3986 	char	*fname;
3987 	fhandle_t *fhp;
3988 };
3989 #endif
3990 int
3991 getfh(td, uap)
3992 	struct thread *td;
3993 	register struct getfh_args *uap;
3994 {
3995 	struct nameidata nd;
3996 	fhandle_t fh;
3997 	register struct vnode *vp;
3998 	int vfslocked;
3999 	int error;
4000 
4001 	error = suser(td);
4002 	if (error)
4003 		return (error);
4004 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4005 	    UIO_USERSPACE, uap->fname, td);
4006 	error = namei(&nd);
4007 	if (error)
4008 		return (error);
4009 	vfslocked = NDHASGIANT(&nd);
4010 	NDFREE(&nd, NDF_ONLY_PNBUF);
4011 	vp = nd.ni_vp;
4012 	bzero(&fh, sizeof(fh));
4013 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4014 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4015 	vput(vp);
4016 	VFS_UNLOCK_GIANT(vfslocked);
4017 	if (error)
4018 		return (error);
4019 	error = copyout(&fh, uap->fhp, sizeof (fh));
4020 	return (error);
4021 }
4022 
4023 /*
4024  * syscall for the rpc.lockd to use to translate a NFS file handle into
4025  * an open descriptor.
4026  *
4027  * warning: do not remove the suser() call or this becomes one giant
4028  * security hole.
4029  *
4030  * MP SAFE
4031  */
4032 #ifndef _SYS_SYSPROTO_H_
4033 struct fhopen_args {
4034 	const struct fhandle *u_fhp;
4035 	int flags;
4036 };
4037 #endif
4038 int
4039 fhopen(td, uap)
4040 	struct thread *td;
4041 	struct fhopen_args /* {
4042 		const struct fhandle *u_fhp;
4043 		int flags;
4044 	} */ *uap;
4045 {
4046 	struct proc *p = td->td_proc;
4047 	struct mount *mp;
4048 	struct vnode *vp;
4049 	struct fhandle fhp;
4050 	struct vattr vat;
4051 	struct vattr *vap = &vat;
4052 	struct flock lf;
4053 	struct file *fp;
4054 	register struct filedesc *fdp = p->p_fd;
4055 	int fmode, mode, error, type;
4056 	struct file *nfp;
4057 	int vfslocked;
4058 	int indx;
4059 
4060 	error = suser(td);
4061 	if (error)
4062 		return (error);
4063 	fmode = FFLAGS(uap->flags);
4064 	/* why not allow a non-read/write open for our lockd? */
4065 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4066 		return (EINVAL);
4067 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4068 	if (error)
4069 		return(error);
4070 	/* find the mount point */
4071 	mp = vfs_getvfs(&fhp.fh_fsid);
4072 	if (mp == NULL)
4073 		return (ESTALE);
4074 	vfslocked = VFS_LOCK_GIANT(mp);
4075 	/* now give me my vnode, it gets returned to me locked */
4076 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4077 	if (error)
4078 		goto out;
4079 	/*
4080 	 * from now on we have to make sure not
4081 	 * to forget about the vnode
4082 	 * any error that causes an abort must vput(vp)
4083 	 * just set error = err and 'goto bad;'.
4084 	 */
4085 
4086 	/*
4087 	 * from vn_open
4088 	 */
4089 	if (vp->v_type == VLNK) {
4090 		error = EMLINK;
4091 		goto bad;
4092 	}
4093 	if (vp->v_type == VSOCK) {
4094 		error = EOPNOTSUPP;
4095 		goto bad;
4096 	}
4097 	mode = 0;
4098 	if (fmode & (FWRITE | O_TRUNC)) {
4099 		if (vp->v_type == VDIR) {
4100 			error = EISDIR;
4101 			goto bad;
4102 		}
4103 		error = vn_writechk(vp);
4104 		if (error)
4105 			goto bad;
4106 		mode |= VWRITE;
4107 	}
4108 	if (fmode & FREAD)
4109 		mode |= VREAD;
4110 	if (fmode & O_APPEND)
4111 		mode |= VAPPEND;
4112 #ifdef MAC
4113 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4114 	if (error)
4115 		goto bad;
4116 #endif
4117 	if (mode) {
4118 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4119 		if (error)
4120 			goto bad;
4121 	}
4122 	if (fmode & O_TRUNC) {
4123 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4124 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4125 			vrele(vp);
4126 			goto out;
4127 		}
4128 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4129 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4130 #ifdef MAC
4131 		/*
4132 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4133 		 * should be right.
4134 		 */
4135 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4136 		if (error == 0) {
4137 #endif
4138 			VATTR_NULL(vap);
4139 			vap->va_size = 0;
4140 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4141 #ifdef MAC
4142 		}
4143 #endif
4144 		vn_finished_write(mp);
4145 		if (error)
4146 			goto bad;
4147 	}
4148 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4149 	if (error)
4150 		goto bad;
4151 
4152 	if (fmode & FWRITE)
4153 		vp->v_writecount++;
4154 
4155 	/*
4156 	 * end of vn_open code
4157 	 */
4158 
4159 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4160 		if (fmode & FWRITE)
4161 			vp->v_writecount--;
4162 		goto bad;
4163 	}
4164 	/* An extra reference on `nfp' has been held for us by falloc(). */
4165 	fp = nfp;
4166 
4167 	nfp->f_vnode = vp;
4168 	nfp->f_data = vp;
4169 	nfp->f_flag = fmode & FMASK;
4170 	nfp->f_ops = &vnops;
4171 	nfp->f_type = DTYPE_VNODE;
4172 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4173 		lf.l_whence = SEEK_SET;
4174 		lf.l_start = 0;
4175 		lf.l_len = 0;
4176 		if (fmode & O_EXLOCK)
4177 			lf.l_type = F_WRLCK;
4178 		else
4179 			lf.l_type = F_RDLCK;
4180 		type = F_FLOCK;
4181 		if ((fmode & FNONBLOCK) == 0)
4182 			type |= F_WAIT;
4183 		VOP_UNLOCK(vp, 0, td);
4184 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4185 			    type)) != 0) {
4186 			/*
4187 			 * The lock request failed.  Normally close the
4188 			 * descriptor but handle the case where someone might
4189 			 * have dup()d or close()d it when we weren't looking.
4190 			 */
4191 			fdclose(fdp, fp, indx, td);
4192 
4193 			/*
4194 			 * release our private reference
4195 			 */
4196 			fdrop(fp, td);
4197 			goto out;
4198 		}
4199 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4200 		fp->f_flag |= FHASLOCK;
4201 	}
4202 
4203 	VOP_UNLOCK(vp, 0, td);
4204 	fdrop(fp, td);
4205 	vfs_rel(mp);
4206 	VFS_UNLOCK_GIANT(vfslocked);
4207 	td->td_retval[0] = indx;
4208 	return (0);
4209 
4210 bad:
4211 	vput(vp);
4212 out:
4213 	vfs_rel(mp);
4214 	VFS_UNLOCK_GIANT(vfslocked);
4215 	return (error);
4216 }
4217 
4218 /*
4219  * Stat an (NFS) file handle.
4220  *
4221  * MP SAFE
4222  */
4223 #ifndef _SYS_SYSPROTO_H_
4224 struct fhstat_args {
4225 	struct fhandle *u_fhp;
4226 	struct stat *sb;
4227 };
4228 #endif
4229 int
4230 fhstat(td, uap)
4231 	struct thread *td;
4232 	register struct fhstat_args /* {
4233 		struct fhandle *u_fhp;
4234 		struct stat *sb;
4235 	} */ *uap;
4236 {
4237 	struct stat sb;
4238 	fhandle_t fh;
4239 	struct mount *mp;
4240 	struct vnode *vp;
4241 	int vfslocked;
4242 	int error;
4243 
4244 	error = suser(td);
4245 	if (error)
4246 		return (error);
4247 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4248 	if (error)
4249 		return (error);
4250 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4251 		return (ESTALE);
4252 	vfslocked = VFS_LOCK_GIANT(mp);
4253 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4254 		vfs_rel(mp);
4255 		VFS_UNLOCK_GIANT(vfslocked);
4256 		return (error);
4257 	}
4258 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4259 	vput(vp);
4260 	vfs_rel(mp);
4261 	VFS_UNLOCK_GIANT(vfslocked);
4262 	if (error)
4263 		return (error);
4264 	error = copyout(&sb, uap->sb, sizeof(sb));
4265 	return (error);
4266 }
4267 
4268 /*
4269  * Implement fstatfs() for (NFS) file handles.
4270  *
4271  * MP SAFE
4272  */
4273 #ifndef _SYS_SYSPROTO_H_
4274 struct fhstatfs_args {
4275 	struct fhandle *u_fhp;
4276 	struct statfs *buf;
4277 };
4278 #endif
4279 int
4280 fhstatfs(td, uap)
4281 	struct thread *td;
4282 	struct fhstatfs_args /* {
4283 		struct fhandle *u_fhp;
4284 		struct statfs *buf;
4285 	} */ *uap;
4286 {
4287 	struct statfs sf;
4288 	fhandle_t fh;
4289 	int error;
4290 
4291 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4292 	if (error)
4293 		return (error);
4294 	error = kern_fhstatfs(td, fh, &sf);
4295 	if (error)
4296 		return (error);
4297 	return (copyout(&sf, uap->buf, sizeof(sf)));
4298 }
4299 
4300 int
4301 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4302 {
4303 	struct statfs *sp;
4304 	struct mount *mp;
4305 	struct vnode *vp;
4306 	int vfslocked;
4307 	int error;
4308 
4309 	error = suser(td);
4310 	if (error)
4311 		return (error);
4312 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4313 		return (ESTALE);
4314 	vfslocked = VFS_LOCK_GIANT(mp);
4315 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4316 	if (error) {
4317 		VFS_UNLOCK_GIANT(vfslocked);
4318 		vfs_rel(mp);
4319 		return (error);
4320 	}
4321 	vput(vp);
4322 	error = prison_canseemount(td->td_ucred, mp);
4323 	if (error)
4324 		goto out;
4325 #ifdef MAC
4326 	error = mac_check_mount_stat(td->td_ucred, mp);
4327 	if (error)
4328 		goto out;
4329 #endif
4330 	/*
4331 	 * Set these in case the underlying filesystem fails to do so.
4332 	 */
4333 	sp = &mp->mnt_stat;
4334 	sp->f_version = STATFS_VERSION;
4335 	sp->f_namemax = NAME_MAX;
4336 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4337 	error = VFS_STATFS(mp, sp, td);
4338 	if (error == 0)
4339 		*buf = *sp;
4340 out:
4341 	vfs_rel(mp);
4342 	VFS_UNLOCK_GIANT(vfslocked);
4343 	return (error);
4344 }
4345 
4346 /*
4347  * Syscall to push extended attribute configuration information into the
4348  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4349  * a command (int cmd), and attribute name and misc data.  For now, the
4350  * attribute name is left in userspace for consumption by the VFS_op.
4351  * It will probably be changed to be copied into sysspace by the
4352  * syscall in the future, once issues with various consumers of the
4353  * attribute code have raised their hands.
4354  *
4355  * Currently this is used only by UFS Extended Attributes.
4356  */
4357 int
4358 extattrctl(td, uap)
4359 	struct thread *td;
4360 	struct extattrctl_args /* {
4361 		const char *path;
4362 		int cmd;
4363 		const char *filename;
4364 		int attrnamespace;
4365 		const char *attrname;
4366 	} */ *uap;
4367 {
4368 	struct vnode *filename_vp;
4369 	struct nameidata nd;
4370 	struct mount *mp, *mp_writable;
4371 	char attrname[EXTATTR_MAXNAMELEN];
4372 	int vfslocked, fnvfslocked, error;
4373 
4374 	AUDIT_ARG(cmd, uap->cmd);
4375 	AUDIT_ARG(value, uap->attrnamespace);
4376 	/*
4377 	 * uap->attrname is not always defined.  We check again later when we
4378 	 * invoke the VFS call so as to pass in NULL there if needed.
4379 	 */
4380 	if (uap->attrname != NULL) {
4381 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4382 		    NULL);
4383 		if (error)
4384 			return (error);
4385 	}
4386 	AUDIT_ARG(text, attrname);
4387 
4388 	vfslocked = fnvfslocked = 0;
4389 	/*
4390 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4391 	 * which VFS_EXTATTRCTL() will later release.
4392 	 */
4393 	filename_vp = NULL;
4394 	if (uap->filename != NULL) {
4395 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4396 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4397 		error = namei(&nd);
4398 		if (error)
4399 			return (error);
4400 		fnvfslocked = NDHASGIANT(&nd);
4401 		filename_vp = nd.ni_vp;
4402 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4403 	}
4404 
4405 	/* uap->path is always defined. */
4406 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4407 	    uap->path, td);
4408 	error = namei(&nd);
4409 	if (error) {
4410 		if (filename_vp != NULL)
4411 			vput(filename_vp);
4412 		goto out;
4413 	}
4414 	vfslocked = NDHASGIANT(&nd);
4415 	mp = nd.ni_vp->v_mount;
4416 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4417 	NDFREE(&nd, 0);
4418 	if (error) {
4419 		if (filename_vp != NULL)
4420 			vput(filename_vp);
4421 		goto out;
4422 	}
4423 
4424 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4425 	    uap->attrname != NULL ? attrname : NULL, td);
4426 
4427 	vn_finished_write(mp_writable);
4428 	/*
4429 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4430 	 * filename_vp, so vrele it if it is defined.
4431 	 */
4432 	if (filename_vp != NULL)
4433 		vrele(filename_vp);
4434 out:
4435 	VFS_UNLOCK_GIANT(fnvfslocked);
4436 	VFS_UNLOCK_GIANT(vfslocked);
4437 	return (error);
4438 }
4439 
4440 /*-
4441  * Set a named extended attribute on a file or directory
4442  *
4443  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4444  *            kernelspace string pointer "attrname", userspace buffer
4445  *            pointer "data", buffer length "nbytes", thread "td".
4446  * Returns: 0 on success, an error number otherwise
4447  * Locks: none
4448  * References: vp must be a valid reference for the duration of the call
4449  */
4450 static int
4451 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4452     void *data, size_t nbytes, struct thread *td)
4453 {
4454 	struct mount *mp;
4455 	struct uio auio;
4456 	struct iovec aiov;
4457 	ssize_t cnt;
4458 	int error;
4459 
4460 	VFS_ASSERT_GIANT(vp->v_mount);
4461 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4462 	if (error)
4463 		return (error);
4464 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4465 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4466 
4467 	aiov.iov_base = data;
4468 	aiov.iov_len = nbytes;
4469 	auio.uio_iov = &aiov;
4470 	auio.uio_iovcnt = 1;
4471 	auio.uio_offset = 0;
4472 	if (nbytes > INT_MAX) {
4473 		error = EINVAL;
4474 		goto done;
4475 	}
4476 	auio.uio_resid = nbytes;
4477 	auio.uio_rw = UIO_WRITE;
4478 	auio.uio_segflg = UIO_USERSPACE;
4479 	auio.uio_td = td;
4480 	cnt = nbytes;
4481 
4482 #ifdef MAC
4483 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4484 	    attrname, &auio);
4485 	if (error)
4486 		goto done;
4487 #endif
4488 
4489 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4490 	    td->td_ucred, td);
4491 	cnt -= auio.uio_resid;
4492 	td->td_retval[0] = cnt;
4493 
4494 done:
4495 	VOP_UNLOCK(vp, 0, td);
4496 	vn_finished_write(mp);
4497 	return (error);
4498 }
4499 
4500 int
4501 extattr_set_fd(td, uap)
4502 	struct thread *td;
4503 	struct extattr_set_fd_args /* {
4504 		int fd;
4505 		int attrnamespace;
4506 		const char *attrname;
4507 		void *data;
4508 		size_t nbytes;
4509 	} */ *uap;
4510 {
4511 	struct file *fp;
4512 	char attrname[EXTATTR_MAXNAMELEN];
4513 	int vfslocked, error;
4514 
4515 	AUDIT_ARG(fd, uap->fd);
4516 	AUDIT_ARG(value, uap->attrnamespace);
4517 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4518 	if (error)
4519 		return (error);
4520 	AUDIT_ARG(text, attrname);
4521 
4522 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4523 	if (error)
4524 		return (error);
4525 
4526 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4527 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4528 	    attrname, uap->data, uap->nbytes, td);
4529 	fdrop(fp, td);
4530 	VFS_UNLOCK_GIANT(vfslocked);
4531 
4532 	return (error);
4533 }
4534 
4535 int
4536 extattr_set_file(td, uap)
4537 	struct thread *td;
4538 	struct extattr_set_file_args /* {
4539 		const char *path;
4540 		int attrnamespace;
4541 		const char *attrname;
4542 		void *data;
4543 		size_t nbytes;
4544 	} */ *uap;
4545 {
4546 	struct nameidata nd;
4547 	char attrname[EXTATTR_MAXNAMELEN];
4548 	int vfslocked, error;
4549 
4550 	AUDIT_ARG(value, uap->attrnamespace);
4551 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4552 	if (error)
4553 		return (error);
4554 	AUDIT_ARG(text, attrname);
4555 
4556 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4557 	    uap->path, td);
4558 	error = namei(&nd);
4559 	if (error)
4560 		return (error);
4561 	NDFREE(&nd, NDF_ONLY_PNBUF);
4562 
4563 	vfslocked = NDHASGIANT(&nd);
4564 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4565 	    uap->data, uap->nbytes, td);
4566 
4567 	vrele(nd.ni_vp);
4568 	VFS_UNLOCK_GIANT(vfslocked);
4569 	return (error);
4570 }
4571 
4572 int
4573 extattr_set_link(td, uap)
4574 	struct thread *td;
4575 	struct extattr_set_link_args /* {
4576 		const char *path;
4577 		int attrnamespace;
4578 		const char *attrname;
4579 		void *data;
4580 		size_t nbytes;
4581 	} */ *uap;
4582 {
4583 	struct nameidata nd;
4584 	char attrname[EXTATTR_MAXNAMELEN];
4585 	int vfslocked, error;
4586 
4587 	AUDIT_ARG(value, uap->attrnamespace);
4588 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4589 	if (error)
4590 		return (error);
4591 	AUDIT_ARG(text, attrname);
4592 
4593 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4594 	    uap->path, td);
4595 	error = namei(&nd);
4596 	if (error)
4597 		return (error);
4598 	NDFREE(&nd, NDF_ONLY_PNBUF);
4599 
4600 	vfslocked = NDHASGIANT(&nd);
4601 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4602 	    uap->data, uap->nbytes, td);
4603 
4604 	vrele(nd.ni_vp);
4605 	VFS_UNLOCK_GIANT(vfslocked);
4606 	return (error);
4607 }
4608 
4609 /*-
4610  * Get a named extended attribute on a file or directory
4611  *
4612  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4613  *            kernelspace string pointer "attrname", userspace buffer
4614  *            pointer "data", buffer length "nbytes", thread "td".
4615  * Returns: 0 on success, an error number otherwise
4616  * Locks: none
4617  * References: vp must be a valid reference for the duration of the call
4618  */
4619 static int
4620 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4621     void *data, size_t nbytes, struct thread *td)
4622 {
4623 	struct uio auio, *auiop;
4624 	struct iovec aiov;
4625 	ssize_t cnt;
4626 	size_t size, *sizep;
4627 	int error;
4628 
4629 	VFS_ASSERT_GIANT(vp->v_mount);
4630 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4631 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4632 
4633 	/*
4634 	 * Slightly unusual semantics: if the user provides a NULL data
4635 	 * pointer, they don't want to receive the data, just the
4636 	 * maximum read length.
4637 	 */
4638 	auiop = NULL;
4639 	sizep = NULL;
4640 	cnt = 0;
4641 	if (data != NULL) {
4642 		aiov.iov_base = data;
4643 		aiov.iov_len = nbytes;
4644 		auio.uio_iov = &aiov;
4645 		auio.uio_iovcnt = 1;
4646 		auio.uio_offset = 0;
4647 		if (nbytes > INT_MAX) {
4648 			error = EINVAL;
4649 			goto done;
4650 		}
4651 		auio.uio_resid = nbytes;
4652 		auio.uio_rw = UIO_READ;
4653 		auio.uio_segflg = UIO_USERSPACE;
4654 		auio.uio_td = td;
4655 		auiop = &auio;
4656 		cnt = nbytes;
4657 	} else
4658 		sizep = &size;
4659 
4660 #ifdef MAC
4661 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4662 	    attrname, &auio);
4663 	if (error)
4664 		goto done;
4665 #endif
4666 
4667 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4668 	    td->td_ucred, td);
4669 
4670 	if (auiop != NULL) {
4671 		cnt -= auio.uio_resid;
4672 		td->td_retval[0] = cnt;
4673 	} else
4674 		td->td_retval[0] = size;
4675 
4676 done:
4677 	VOP_UNLOCK(vp, 0, td);
4678 	return (error);
4679 }
4680 
4681 int
4682 extattr_get_fd(td, uap)
4683 	struct thread *td;
4684 	struct extattr_get_fd_args /* {
4685 		int fd;
4686 		int attrnamespace;
4687 		const char *attrname;
4688 		void *data;
4689 		size_t nbytes;
4690 	} */ *uap;
4691 {
4692 	struct file *fp;
4693 	char attrname[EXTATTR_MAXNAMELEN];
4694 	int vfslocked, error;
4695 
4696 	AUDIT_ARG(fd, uap->fd);
4697 	AUDIT_ARG(value, uap->attrnamespace);
4698 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4699 	if (error)
4700 		return (error);
4701 	AUDIT_ARG(text, attrname);
4702 
4703 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4704 	if (error)
4705 		return (error);
4706 
4707 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4708 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4709 	    attrname, uap->data, uap->nbytes, td);
4710 
4711 	fdrop(fp, td);
4712 	VFS_UNLOCK_GIANT(vfslocked);
4713 	return (error);
4714 }
4715 
4716 int
4717 extattr_get_file(td, uap)
4718 	struct thread *td;
4719 	struct extattr_get_file_args /* {
4720 		const char *path;
4721 		int attrnamespace;
4722 		const char *attrname;
4723 		void *data;
4724 		size_t nbytes;
4725 	} */ *uap;
4726 {
4727 	struct nameidata nd;
4728 	char attrname[EXTATTR_MAXNAMELEN];
4729 	int vfslocked, error;
4730 
4731 	AUDIT_ARG(value, uap->attrnamespace);
4732 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4733 	if (error)
4734 		return (error);
4735 	AUDIT_ARG(text, attrname);
4736 
4737 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4738 	    uap->path, td);
4739 	error = namei(&nd);
4740 	if (error)
4741 		return (error);
4742 	NDFREE(&nd, NDF_ONLY_PNBUF);
4743 
4744 	vfslocked = NDHASGIANT(&nd);
4745 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4746 	    uap->data, uap->nbytes, td);
4747 
4748 	vrele(nd.ni_vp);
4749 	VFS_UNLOCK_GIANT(vfslocked);
4750 	return (error);
4751 }
4752 
4753 int
4754 extattr_get_link(td, uap)
4755 	struct thread *td;
4756 	struct extattr_get_link_args /* {
4757 		const char *path;
4758 		int attrnamespace;
4759 		const char *attrname;
4760 		void *data;
4761 		size_t nbytes;
4762 	} */ *uap;
4763 {
4764 	struct nameidata nd;
4765 	char attrname[EXTATTR_MAXNAMELEN];
4766 	int vfslocked, error;
4767 
4768 	AUDIT_ARG(value, uap->attrnamespace);
4769 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4770 	if (error)
4771 		return (error);
4772 	AUDIT_ARG(text, attrname);
4773 
4774 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4775 	    uap->path, td);
4776 	error = namei(&nd);
4777 	if (error)
4778 		return (error);
4779 	NDFREE(&nd, NDF_ONLY_PNBUF);
4780 
4781 	vfslocked = NDHASGIANT(&nd);
4782 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4783 	    uap->data, uap->nbytes, td);
4784 
4785 	vrele(nd.ni_vp);
4786 	VFS_UNLOCK_GIANT(vfslocked);
4787 	return (error);
4788 }
4789 
4790 /*
4791  * extattr_delete_vp(): Delete a named extended attribute on a file or
4792  *                      directory
4793  *
4794  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4795  *            kernelspace string pointer "attrname", proc "p"
4796  * Returns: 0 on success, an error number otherwise
4797  * Locks: none
4798  * References: vp must be a valid reference for the duration of the call
4799  */
4800 static int
4801 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4802     struct thread *td)
4803 {
4804 	struct mount *mp;
4805 	int error;
4806 
4807 	VFS_ASSERT_GIANT(vp->v_mount);
4808 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4809 	if (error)
4810 		return (error);
4811 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4812 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4813 
4814 #ifdef MAC
4815 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4816 	    attrname);
4817 	if (error)
4818 		goto done;
4819 #endif
4820 
4821 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4822 	    td);
4823 	if (error == EOPNOTSUPP)
4824 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4825 		    td->td_ucred, td);
4826 #ifdef MAC
4827 done:
4828 #endif
4829 	VOP_UNLOCK(vp, 0, td);
4830 	vn_finished_write(mp);
4831 	return (error);
4832 }
4833 
4834 int
4835 extattr_delete_fd(td, uap)
4836 	struct thread *td;
4837 	struct extattr_delete_fd_args /* {
4838 		int fd;
4839 		int attrnamespace;
4840 		const char *attrname;
4841 	} */ *uap;
4842 {
4843 	struct file *fp;
4844 	char attrname[EXTATTR_MAXNAMELEN];
4845 	int vfslocked, error;
4846 
4847 	AUDIT_ARG(fd, uap->fd);
4848 	AUDIT_ARG(value, uap->attrnamespace);
4849 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4850 	if (error)
4851 		return (error);
4852 	AUDIT_ARG(text, attrname);
4853 
4854 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4855 	if (error)
4856 		return (error);
4857 
4858 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4859 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4860 	    attrname, td);
4861 	fdrop(fp, td);
4862 	VFS_UNLOCK_GIANT(vfslocked);
4863 	return (error);
4864 }
4865 
4866 int
4867 extattr_delete_file(td, uap)
4868 	struct thread *td;
4869 	struct extattr_delete_file_args /* {
4870 		const char *path;
4871 		int attrnamespace;
4872 		const char *attrname;
4873 	} */ *uap;
4874 {
4875 	struct nameidata nd;
4876 	char attrname[EXTATTR_MAXNAMELEN];
4877 	int vfslocked, error;
4878 
4879 	AUDIT_ARG(value, uap->attrnamespace);
4880 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4881 	if (error)
4882 		return(error);
4883 	AUDIT_ARG(text, attrname);
4884 
4885 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4886 	    uap->path, td);
4887 	error = namei(&nd);
4888 	if (error)
4889 		return(error);
4890 	NDFREE(&nd, NDF_ONLY_PNBUF);
4891 
4892 	vfslocked = NDHASGIANT(&nd);
4893 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4894 	vrele(nd.ni_vp);
4895 	VFS_UNLOCK_GIANT(vfslocked);
4896 	return(error);
4897 }
4898 
4899 int
4900 extattr_delete_link(td, uap)
4901 	struct thread *td;
4902 	struct extattr_delete_link_args /* {
4903 		const char *path;
4904 		int attrnamespace;
4905 		const char *attrname;
4906 	} */ *uap;
4907 {
4908 	struct nameidata nd;
4909 	char attrname[EXTATTR_MAXNAMELEN];
4910 	int vfslocked, error;
4911 
4912 	AUDIT_ARG(value, uap->attrnamespace);
4913 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4914 	if (error)
4915 		return(error);
4916 	AUDIT_ARG(text, attrname);
4917 
4918 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4919 	    uap->path, td);
4920 	error = namei(&nd);
4921 	if (error)
4922 		return(error);
4923 	NDFREE(&nd, NDF_ONLY_PNBUF);
4924 
4925 	vfslocked = NDHASGIANT(&nd);
4926 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4927 	vrele(nd.ni_vp);
4928 	VFS_UNLOCK_GIANT(vfslocked);
4929 	return(error);
4930 }
4931 
4932 /*-
4933  * Retrieve a list of extended attributes on a file or directory.
4934  *
4935  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4936  *            userspace buffer pointer "data", buffer length "nbytes",
4937  *            thread "td".
4938  * Returns: 0 on success, an error number otherwise
4939  * Locks: none
4940  * References: vp must be a valid reference for the duration of the call
4941  */
4942 static int
4943 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4944     size_t nbytes, struct thread *td)
4945 {
4946 	struct uio auio, *auiop;
4947 	size_t size, *sizep;
4948 	struct iovec aiov;
4949 	ssize_t cnt;
4950 	int error;
4951 
4952 	VFS_ASSERT_GIANT(vp->v_mount);
4953 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4954 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4955 
4956 	auiop = NULL;
4957 	sizep = NULL;
4958 	cnt = 0;
4959 	if (data != NULL) {
4960 		aiov.iov_base = data;
4961 		aiov.iov_len = nbytes;
4962 		auio.uio_iov = &aiov;
4963 		auio.uio_iovcnt = 1;
4964 		auio.uio_offset = 0;
4965 		if (nbytes > INT_MAX) {
4966 			error = EINVAL;
4967 			goto done;
4968 		}
4969 		auio.uio_resid = nbytes;
4970 		auio.uio_rw = UIO_READ;
4971 		auio.uio_segflg = UIO_USERSPACE;
4972 		auio.uio_td = td;
4973 		auiop = &auio;
4974 		cnt = nbytes;
4975 	} else
4976 		sizep = &size;
4977 
4978 #ifdef MAC
4979 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4980 	if (error)
4981 		goto done;
4982 #endif
4983 
4984 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4985 	    td->td_ucred, td);
4986 
4987 	if (auiop != NULL) {
4988 		cnt -= auio.uio_resid;
4989 		td->td_retval[0] = cnt;
4990 	} else
4991 		td->td_retval[0] = size;
4992 
4993 done:
4994 	VOP_UNLOCK(vp, 0, td);
4995 	return (error);
4996 }
4997 
4998 
4999 int
5000 extattr_list_fd(td, uap)
5001 	struct thread *td;
5002 	struct extattr_list_fd_args /* {
5003 		int fd;
5004 		int attrnamespace;
5005 		void *data;
5006 		size_t nbytes;
5007 	} */ *uap;
5008 {
5009 	struct file *fp;
5010 	int vfslocked, error;
5011 
5012 	AUDIT_ARG(fd, uap->fd);
5013 	AUDIT_ARG(value, uap->attrnamespace);
5014 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5015 	if (error)
5016 		return (error);
5017 
5018 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5019 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5020 	    uap->nbytes, td);
5021 
5022 	fdrop(fp, td);
5023 	VFS_UNLOCK_GIANT(vfslocked);
5024 	return (error);
5025 }
5026 
5027 int
5028 extattr_list_file(td, uap)
5029 	struct thread*td;
5030 	struct extattr_list_file_args /* {
5031 		const char *path;
5032 		int attrnamespace;
5033 		void *data;
5034 		size_t nbytes;
5035 	} */ *uap;
5036 {
5037 	struct nameidata nd;
5038 	int vfslocked, error;
5039 
5040 	AUDIT_ARG(value, uap->attrnamespace);
5041 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5042 	    uap->path, td);
5043 	error = namei(&nd);
5044 	if (error)
5045 		return (error);
5046 	NDFREE(&nd, NDF_ONLY_PNBUF);
5047 
5048 	vfslocked = NDHASGIANT(&nd);
5049 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5050 	    uap->nbytes, td);
5051 
5052 	vrele(nd.ni_vp);
5053 	VFS_UNLOCK_GIANT(vfslocked);
5054 	return (error);
5055 }
5056 
5057 int
5058 extattr_list_link(td, uap)
5059 	struct thread*td;
5060 	struct extattr_list_link_args /* {
5061 		const char *path;
5062 		int attrnamespace;
5063 		void *data;
5064 		size_t nbytes;
5065 	} */ *uap;
5066 {
5067 	struct nameidata nd;
5068 	int vfslocked, error;
5069 
5070 	AUDIT_ARG(value, uap->attrnamespace);
5071 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5072 	    uap->path, td);
5073 	error = namei(&nd);
5074 	if (error)
5075 		return (error);
5076 	NDFREE(&nd, NDF_ONLY_PNBUF);
5077 
5078 	vfslocked = NDHASGIANT(&nd);
5079 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5080 	    uap->nbytes, td);
5081 
5082 	vrele(nd.ni_vp);
5083 	VFS_UNLOCK_GIANT(vfslocked);
5084 	return (error);
5085 }
5086