xref: /freebsd/sys/kern/vfs_extattr.c (revision acd3428b7d3e94cef0e1881c868cb4b131d4ff41)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/limits.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/priv.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 #include <security/mac/mac_framework.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/uma.h>
80 
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87     const struct timespec *, int, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89     struct thread *td);
90 
91 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
92     size_t nbytes, struct thread *td);
93 
94 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
95 
96 /*
97  * The module initialization routine for POSIX asynchronous I/O will
98  * set this to the version of AIO that it implements.  (Zero means
99  * that it is not implemented.)  This value is used here by pathconf()
100  * and in kern_descrip.c by fpathconf().
101  */
102 int async_io_version;
103 
104 /*
105  * Sync each mounted filesystem.
106  */
107 #ifndef _SYS_SYSPROTO_H_
108 struct sync_args {
109 	int     dummy;
110 };
111 #endif
112 
113 #ifdef DEBUG
114 static int syncprt = 0;
115 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
116 #endif
117 
118 /* ARGSUSED */
119 int
120 sync(td, uap)
121 	struct thread *td;
122 	struct sync_args *uap;
123 {
124 	struct mount *mp, *nmp;
125 	int vfslocked;
126 
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		vfslocked = VFS_LOCK_GIANT(mp);
134 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
135 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
136 			MNT_ILOCK(mp);
137 			mp->mnt_noasync++;
138 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
139 			MNT_IUNLOCK(mp);
140 			vfs_msync(mp, MNT_NOWAIT);
141 			VFS_SYNC(mp, MNT_NOWAIT, td);
142 			MNT_ILOCK(mp);
143 			mp->mnt_noasync--;
144 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
145 			    mp->mnt_noasync == 0)
146 				mp->mnt_kern_flag |= MNTK_ASYNC;
147 			MNT_IUNLOCK(mp);
148 			vn_finished_write(mp);
149 		}
150 		VFS_UNLOCK_GIANT(vfslocked);
151 		mtx_lock(&mountlist_mtx);
152 		nmp = TAILQ_NEXT(mp, mnt_list);
153 		vfs_unbusy(mp, td);
154 	}
155 	mtx_unlock(&mountlist_mtx);
156 	return (0);
157 }
158 
159 /* XXX PRISON: could be per prison flag */
160 static int prison_quotas;
161 #if 0
162 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163 #endif
164 
165 /*
166  * Change filesystem quotas.
167  *
168  * MP SAFE
169  */
170 #ifndef _SYS_SYSPROTO_H_
171 struct quotactl_args {
172 	char *path;
173 	int cmd;
174 	int uid;
175 	caddr_t arg;
176 };
177 #endif
178 int
179 quotactl(td, uap)
180 	struct thread *td;
181 	register struct quotactl_args /* {
182 		char *path;
183 		int cmd;
184 		int uid;
185 		caddr_t arg;
186 	} */ *uap;
187 {
188 	struct mount *mp, *vmp;
189 	int vfslocked;
190 	int error;
191 	struct nameidata nd;
192 
193 	AUDIT_ARG(cmd, uap->cmd);
194 	AUDIT_ARG(uid, uap->uid);
195 	if (jailed(td->td_ucred) && !prison_quotas)
196 		return (EPERM);
197 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
198 	   UIO_USERSPACE, uap->path, td);
199 	if ((error = namei(&nd)) != 0)
200 		return (error);
201 	vfslocked = NDHASGIANT(&nd);
202 	NDFREE(&nd, NDF_ONLY_PNBUF);
203 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
204 	mp = nd.ni_vp->v_mount;
205 	vrele(nd.ni_vp);
206 	if (error)
207 		goto out;
208 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
209 	vn_finished_write(vmp);
210 out:
211 	VFS_UNLOCK_GIANT(vfslocked);
212 	return (error);
213 }
214 
215 /*
216  * Get filesystem statistics.
217  */
218 #ifndef _SYS_SYSPROTO_H_
219 struct statfs_args {
220 	char *path;
221 	struct statfs *buf;
222 };
223 #endif
224 int
225 statfs(td, uap)
226 	struct thread *td;
227 	register struct statfs_args /* {
228 		char *path;
229 		struct statfs *buf;
230 	} */ *uap;
231 {
232 	struct statfs sf;
233 	int error;
234 
235 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
236 	if (error == 0)
237 		error = copyout(&sf, uap->buf, sizeof(sf));
238 	return (error);
239 }
240 
241 int
242 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
243     struct statfs *buf)
244 {
245 	struct mount *mp;
246 	struct statfs *sp, sb;
247 	int vfslocked;
248 	int error;
249 	struct nameidata nd;
250 
251 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
252 	    pathseg, path, td);
253 	error = namei(&nd);
254 	if (error)
255 		return (error);
256 	vfslocked = NDHASGIANT(&nd);
257 	mp = nd.ni_vp->v_mount;
258 	vfs_ref(mp);
259 	NDFREE(&nd, NDF_ONLY_PNBUF);
260 	vput(nd.ni_vp);
261 #ifdef MAC
262 	error = mac_check_mount_stat(td->td_ucred, mp);
263 	if (error)
264 		goto out;
265 #endif
266 	/*
267 	 * Set these in case the underlying filesystem fails to do so.
268 	 */
269 	sp = &mp->mnt_stat;
270 	sp->f_version = STATFS_VERSION;
271 	sp->f_namemax = NAME_MAX;
272 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
273 	error = VFS_STATFS(mp, sp, td);
274 	if (error)
275 		goto out;
276 	if (priv_check(td, PRIV_VFS_GENERATION)) {
277 		bcopy(sp, &sb, sizeof(sb));
278 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
279 		prison_enforce_statfs(td->td_ucred, mp, &sb);
280 		sp = &sb;
281 	}
282 	*buf = *sp;
283 out:
284 	vfs_rel(mp);
285 	VFS_UNLOCK_GIANT(vfslocked);
286 	if (mtx_owned(&Giant))
287 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
288 	return (error);
289 }
290 
291 /*
292  * Get filesystem statistics.
293  */
294 #ifndef _SYS_SYSPROTO_H_
295 struct fstatfs_args {
296 	int fd;
297 	struct statfs *buf;
298 };
299 #endif
300 int
301 fstatfs(td, uap)
302 	struct thread *td;
303 	register struct fstatfs_args /* {
304 		int fd;
305 		struct statfs *buf;
306 	} */ *uap;
307 {
308 	struct statfs sf;
309 	int error;
310 
311 	error = kern_fstatfs(td, uap->fd, &sf);
312 	if (error == 0)
313 		error = copyout(&sf, uap->buf, sizeof(sf));
314 	return (error);
315 }
316 
317 int
318 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
319 {
320 	struct file *fp;
321 	struct mount *mp;
322 	struct statfs *sp, sb;
323 	int vfslocked;
324 	struct vnode *vp;
325 	int error;
326 
327 	AUDIT_ARG(fd, fd);
328 	error = getvnode(td->td_proc->p_fd, fd, &fp);
329 	if (error)
330 		return (error);
331 	vp = fp->f_vnode;
332 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
333 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
334 #ifdef AUDIT
335 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
336 #endif
337 	mp = vp->v_mount;
338 	if (mp)
339 		vfs_ref(mp);
340 	VOP_UNLOCK(vp, 0, td);
341 	fdrop(fp, td);
342 	if (vp->v_iflag & VI_DOOMED) {
343 		error = EBADF;
344 		goto out;
345 	}
346 #ifdef MAC
347 	error = mac_check_mount_stat(td->td_ucred, mp);
348 	if (error)
349 		goto out;
350 #endif
351 	/*
352 	 * Set these in case the underlying filesystem fails to do so.
353 	 */
354 	sp = &mp->mnt_stat;
355 	sp->f_version = STATFS_VERSION;
356 	sp->f_namemax = NAME_MAX;
357 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
358 	error = VFS_STATFS(mp, sp, td);
359 	if (error)
360 		goto out;
361 	if (priv_check(td, PRIV_VFS_GENERATION)) {
362 		bcopy(sp, &sb, sizeof(sb));
363 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
364 		prison_enforce_statfs(td->td_ucred, mp, &sb);
365 		sp = &sb;
366 	}
367 	*buf = *sp;
368 out:
369 	if (mp)
370 		vfs_rel(mp);
371 	VFS_UNLOCK_GIANT(vfslocked);
372 	return (error);
373 }
374 
375 /*
376  * Get statistics on all filesystems.
377  */
378 #ifndef _SYS_SYSPROTO_H_
379 struct getfsstat_args {
380 	struct statfs *buf;
381 	long bufsize;
382 	int flags;
383 };
384 #endif
385 int
386 getfsstat(td, uap)
387 	struct thread *td;
388 	register struct getfsstat_args /* {
389 		struct statfs *buf;
390 		long bufsize;
391 		int flags;
392 	} */ *uap;
393 {
394 
395 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
396 	    uap->flags));
397 }
398 
399 /*
400  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
401  * 	The caller is responsible for freeing memory which will be allocated
402  *	in '*buf'.
403  */
404 int
405 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
406     enum uio_seg bufseg, int flags)
407 {
408 	struct mount *mp, *nmp;
409 	struct statfs *sfsp, *sp, sb;
410 	size_t count, maxcount;
411 	int vfslocked;
412 	int error;
413 
414 	maxcount = bufsize / sizeof(struct statfs);
415 	if (bufsize == 0)
416 		sfsp = NULL;
417 	else if (bufseg == UIO_USERSPACE)
418 		sfsp = *buf;
419 	else /* if (bufseg == UIO_SYSSPACE) */ {
420 		count = 0;
421 		mtx_lock(&mountlist_mtx);
422 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
423 			count++;
424 		}
425 		mtx_unlock(&mountlist_mtx);
426 		if (maxcount > count)
427 			maxcount = count;
428 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
429 		    M_WAITOK);
430 	}
431 	count = 0;
432 	mtx_lock(&mountlist_mtx);
433 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
434 		if (prison_canseemount(td->td_ucred, mp) != 0) {
435 			nmp = TAILQ_NEXT(mp, mnt_list);
436 			continue;
437 		}
438 #ifdef MAC
439 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
440 			nmp = TAILQ_NEXT(mp, mnt_list);
441 			continue;
442 		}
443 #endif
444 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
445 			nmp = TAILQ_NEXT(mp, mnt_list);
446 			continue;
447 		}
448 		vfslocked = VFS_LOCK_GIANT(mp);
449 		if (sfsp && count < maxcount) {
450 			sp = &mp->mnt_stat;
451 			/*
452 			 * Set these in case the underlying filesystem
453 			 * fails to do so.
454 			 */
455 			sp->f_version = STATFS_VERSION;
456 			sp->f_namemax = NAME_MAX;
457 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
458 			/*
459 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
460 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
461 			 * overrides MNT_WAIT.
462 			 */
463 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
464 			    (flags & MNT_WAIT)) &&
465 			    (error = VFS_STATFS(mp, sp, td))) {
466 				VFS_UNLOCK_GIANT(vfslocked);
467 				mtx_lock(&mountlist_mtx);
468 				nmp = TAILQ_NEXT(mp, mnt_list);
469 				vfs_unbusy(mp, td);
470 				continue;
471 			}
472 			if (priv_check(td, PRIV_VFS_GENERATION)) {
473 				bcopy(sp, &sb, sizeof(sb));
474 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
475 				prison_enforce_statfs(td->td_ucred, mp, &sb);
476 				sp = &sb;
477 			}
478 			if (bufseg == UIO_SYSSPACE)
479 				bcopy(sp, sfsp, sizeof(*sp));
480 			else /* if (bufseg == UIO_USERSPACE) */ {
481 				error = copyout(sp, sfsp, sizeof(*sp));
482 				if (error) {
483 					vfs_unbusy(mp, td);
484 					VFS_UNLOCK_GIANT(vfslocked);
485 					return (error);
486 				}
487 			}
488 			sfsp++;
489 		}
490 		VFS_UNLOCK_GIANT(vfslocked);
491 		count++;
492 		mtx_lock(&mountlist_mtx);
493 		nmp = TAILQ_NEXT(mp, mnt_list);
494 		vfs_unbusy(mp, td);
495 	}
496 	mtx_unlock(&mountlist_mtx);
497 	if (sfsp && count > maxcount)
498 		td->td_retval[0] = maxcount;
499 	else
500 		td->td_retval[0] = count;
501 	return (0);
502 }
503 
504 #ifdef COMPAT_FREEBSD4
505 /*
506  * Get old format filesystem statistics.
507  */
508 static void cvtstatfs(struct statfs *, struct ostatfs *);
509 
510 #ifndef _SYS_SYSPROTO_H_
511 struct freebsd4_statfs_args {
512 	char *path;
513 	struct ostatfs *buf;
514 };
515 #endif
516 int
517 freebsd4_statfs(td, uap)
518 	struct thread *td;
519 	struct freebsd4_statfs_args /* {
520 		char *path;
521 		struct ostatfs *buf;
522 	} */ *uap;
523 {
524 	struct ostatfs osb;
525 	struct statfs sf;
526 	int error;
527 
528 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
529 	if (error)
530 		return (error);
531 	cvtstatfs(&sf, &osb);
532 	return (copyout(&osb, uap->buf, sizeof(osb)));
533 }
534 
535 /*
536  * Get filesystem statistics.
537  */
538 #ifndef _SYS_SYSPROTO_H_
539 struct freebsd4_fstatfs_args {
540 	int fd;
541 	struct ostatfs *buf;
542 };
543 #endif
544 int
545 freebsd4_fstatfs(td, uap)
546 	struct thread *td;
547 	struct freebsd4_fstatfs_args /* {
548 		int fd;
549 		struct ostatfs *buf;
550 	} */ *uap;
551 {
552 	struct ostatfs osb;
553 	struct statfs sf;
554 	int error;
555 
556 	error = kern_fstatfs(td, uap->fd, &sf);
557 	if (error)
558 		return (error);
559 	cvtstatfs(&sf, &osb);
560 	return (copyout(&osb, uap->buf, sizeof(osb)));
561 }
562 
563 /*
564  * Get statistics on all filesystems.
565  */
566 #ifndef _SYS_SYSPROTO_H_
567 struct freebsd4_getfsstat_args {
568 	struct ostatfs *buf;
569 	long bufsize;
570 	int flags;
571 };
572 #endif
573 int
574 freebsd4_getfsstat(td, uap)
575 	struct thread *td;
576 	register struct freebsd4_getfsstat_args /* {
577 		struct ostatfs *buf;
578 		long bufsize;
579 		int flags;
580 	} */ *uap;
581 {
582 	struct statfs *buf, *sp;
583 	struct ostatfs osb;
584 	size_t count, size;
585 	int error;
586 
587 	count = uap->bufsize / sizeof(struct ostatfs);
588 	size = count * sizeof(struct statfs);
589 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
590 	if (size > 0) {
591 		count = td->td_retval[0];
592 		sp = buf;
593 		while (count > 0 && error == 0) {
594 			cvtstatfs(sp, &osb);
595 			error = copyout(&osb, uap->buf, sizeof(osb));
596 			sp++;
597 			uap->buf++;
598 			count--;
599 		}
600 		free(buf, M_TEMP);
601 	}
602 	return (error);
603 }
604 
605 /*
606  * Implement fstatfs() for (NFS) file handles.
607  */
608 #ifndef _SYS_SYSPROTO_H_
609 struct freebsd4_fhstatfs_args {
610 	struct fhandle *u_fhp;
611 	struct ostatfs *buf;
612 };
613 #endif
614 int
615 freebsd4_fhstatfs(td, uap)
616 	struct thread *td;
617 	struct freebsd4_fhstatfs_args /* {
618 		struct fhandle *u_fhp;
619 		struct ostatfs *buf;
620 	} */ *uap;
621 {
622 	struct ostatfs osb;
623 	struct statfs sf;
624 	fhandle_t fh;
625 	int error;
626 
627 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
628 	if (error)
629 		return (error);
630 	error = kern_fhstatfs(td, fh, &sf);
631 	if (error)
632 		return (error);
633 	cvtstatfs(&sf, &osb);
634 	return (copyout(&osb, uap->buf, sizeof(osb)));
635 }
636 
637 /*
638  * Convert a new format statfs structure to an old format statfs structure.
639  */
640 static void
641 cvtstatfs(nsp, osp)
642 	struct statfs *nsp;
643 	struct ostatfs *osp;
644 {
645 
646 	bzero(osp, sizeof(*osp));
647 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
648 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
649 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
650 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
651 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
652 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
653 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
654 	osp->f_owner = nsp->f_owner;
655 	osp->f_type = nsp->f_type;
656 	osp->f_flags = nsp->f_flags;
657 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
658 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
659 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
660 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
661 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
662 	    MIN(MFSNAMELEN, OMFSNAMELEN));
663 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
664 	    MIN(MNAMELEN, OMNAMELEN));
665 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
666 	    MIN(MNAMELEN, OMNAMELEN));
667 	osp->f_fsid = nsp->f_fsid;
668 }
669 #endif /* COMPAT_FREEBSD4 */
670 
671 /*
672  * Change current working directory to a given file descriptor.
673  */
674 #ifndef _SYS_SYSPROTO_H_
675 struct fchdir_args {
676 	int	fd;
677 };
678 #endif
679 int
680 fchdir(td, uap)
681 	struct thread *td;
682 	struct fchdir_args /* {
683 		int fd;
684 	} */ *uap;
685 {
686 	register struct filedesc *fdp = td->td_proc->p_fd;
687 	struct vnode *vp, *tdp, *vpold;
688 	struct mount *mp;
689 	struct file *fp;
690 	int vfslocked;
691 	int error;
692 
693 	AUDIT_ARG(fd, uap->fd);
694 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
695 		return (error);
696 	vp = fp->f_vnode;
697 	VREF(vp);
698 	fdrop(fp, td);
699 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
700 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
701 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
702 	error = change_dir(vp, td);
703 	while (!error && (mp = vp->v_mountedhere) != NULL) {
704 		int tvfslocked;
705 		if (vfs_busy(mp, 0, 0, td))
706 			continue;
707 		tvfslocked = VFS_LOCK_GIANT(mp);
708 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
709 		vfs_unbusy(mp, td);
710 		if (error) {
711 			VFS_UNLOCK_GIANT(tvfslocked);
712 			break;
713 		}
714 		vput(vp);
715 		VFS_UNLOCK_GIANT(vfslocked);
716 		vp = tdp;
717 		vfslocked = tvfslocked;
718 	}
719 	if (error) {
720 		vput(vp);
721 		VFS_UNLOCK_GIANT(vfslocked);
722 		return (error);
723 	}
724 	VOP_UNLOCK(vp, 0, td);
725 	VFS_UNLOCK_GIANT(vfslocked);
726 	FILEDESC_LOCK_FAST(fdp);
727 	vpold = fdp->fd_cdir;
728 	fdp->fd_cdir = vp;
729 	FILEDESC_UNLOCK_FAST(fdp);
730 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
731 	vrele(vpold);
732 	VFS_UNLOCK_GIANT(vfslocked);
733 	return (0);
734 }
735 
736 /*
737  * Change current working directory (``.'').
738  */
739 #ifndef _SYS_SYSPROTO_H_
740 struct chdir_args {
741 	char	*path;
742 };
743 #endif
744 int
745 chdir(td, uap)
746 	struct thread *td;
747 	struct chdir_args /* {
748 		char *path;
749 	} */ *uap;
750 {
751 
752 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
753 }
754 
755 int
756 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
757 {
758 	register struct filedesc *fdp = td->td_proc->p_fd;
759 	int error;
760 	struct nameidata nd;
761 	struct vnode *vp;
762 	int vfslocked;
763 
764 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
765 	    pathseg, path, td);
766 	if ((error = namei(&nd)) != 0)
767 		return (error);
768 	vfslocked = NDHASGIANT(&nd);
769 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
770 		vput(nd.ni_vp);
771 		VFS_UNLOCK_GIANT(vfslocked);
772 		NDFREE(&nd, NDF_ONLY_PNBUF);
773 		return (error);
774 	}
775 	VOP_UNLOCK(nd.ni_vp, 0, td);
776 	VFS_UNLOCK_GIANT(vfslocked);
777 	NDFREE(&nd, NDF_ONLY_PNBUF);
778 	FILEDESC_LOCK_FAST(fdp);
779 	vp = fdp->fd_cdir;
780 	fdp->fd_cdir = nd.ni_vp;
781 	FILEDESC_UNLOCK_FAST(fdp);
782 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
783 	vrele(vp);
784 	VFS_UNLOCK_GIANT(vfslocked);
785 	return (0);
786 }
787 
788 /*
789  * Helper function for raised chroot(2) security function:  Refuse if
790  * any filedescriptors are open directories.
791  */
792 static int
793 chroot_refuse_vdir_fds(fdp)
794 	struct filedesc *fdp;
795 {
796 	struct vnode *vp;
797 	struct file *fp;
798 	int fd;
799 
800 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
801 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
802 		fp = fget_locked(fdp, fd);
803 		if (fp == NULL)
804 			continue;
805 		if (fp->f_type == DTYPE_VNODE) {
806 			vp = fp->f_vnode;
807 			if (vp->v_type == VDIR)
808 				return (EPERM);
809 		}
810 	}
811 	return (0);
812 }
813 
814 /*
815  * This sysctl determines if we will allow a process to chroot(2) if it
816  * has a directory open:
817  *	0: disallowed for all processes.
818  *	1: allowed for processes that were not already chroot(2)'ed.
819  *	2: allowed for all processes.
820  */
821 
822 static int chroot_allow_open_directories = 1;
823 
824 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
825      &chroot_allow_open_directories, 0, "");
826 
827 /*
828  * Change notion of root (``/'') directory.
829  */
830 #ifndef _SYS_SYSPROTO_H_
831 struct chroot_args {
832 	char	*path;
833 };
834 #endif
835 int
836 chroot(td, uap)
837 	struct thread *td;
838 	struct chroot_args /* {
839 		char *path;
840 	} */ *uap;
841 {
842 	int error;
843 	struct nameidata nd;
844 	int vfslocked;
845 
846 	error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT,
847 	    SUSER_ALLOWJAIL);
848 	if (error)
849 		return (error);
850 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
851 	    UIO_USERSPACE, uap->path, td);
852 	error = namei(&nd);
853 	if (error)
854 		goto error;
855 	vfslocked = NDHASGIANT(&nd);
856 	if ((error = change_dir(nd.ni_vp, td)) != 0)
857 		goto e_vunlock;
858 #ifdef MAC
859 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
860 		goto e_vunlock;
861 #endif
862 	VOP_UNLOCK(nd.ni_vp, 0, td);
863 	error = change_root(nd.ni_vp, td);
864 	vrele(nd.ni_vp);
865 	VFS_UNLOCK_GIANT(vfslocked);
866 	NDFREE(&nd, NDF_ONLY_PNBUF);
867 	return (error);
868 e_vunlock:
869 	vput(nd.ni_vp);
870 	VFS_UNLOCK_GIANT(vfslocked);
871 error:
872 	NDFREE(&nd, NDF_ONLY_PNBUF);
873 	return (error);
874 }
875 
876 /*
877  * Common routine for chroot and chdir.  Callers must provide a locked vnode
878  * instance.
879  */
880 int
881 change_dir(vp, td)
882 	struct vnode *vp;
883 	struct thread *td;
884 {
885 	int error;
886 
887 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
888 	if (vp->v_type != VDIR)
889 		return (ENOTDIR);
890 #ifdef MAC
891 	error = mac_check_vnode_chdir(td->td_ucred, vp);
892 	if (error)
893 		return (error);
894 #endif
895 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
896 	return (error);
897 }
898 
899 /*
900  * Common routine for kern_chroot() and jail_attach().  The caller is
901  * responsible for invoking priv_check() and mac_check_chroot() to authorize
902  * this operation.
903  */
904 int
905 change_root(vp, td)
906 	struct vnode *vp;
907 	struct thread *td;
908 {
909 	struct filedesc *fdp;
910 	struct vnode *oldvp;
911 	int vfslocked;
912 	int error;
913 
914 	VFS_ASSERT_GIANT(vp->v_mount);
915 	fdp = td->td_proc->p_fd;
916 	FILEDESC_LOCK(fdp);
917 	if (chroot_allow_open_directories == 0 ||
918 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
919 		error = chroot_refuse_vdir_fds(fdp);
920 		if (error) {
921 			FILEDESC_UNLOCK(fdp);
922 			return (error);
923 		}
924 	}
925 	oldvp = fdp->fd_rdir;
926 	fdp->fd_rdir = vp;
927 	VREF(fdp->fd_rdir);
928 	if (!fdp->fd_jdir) {
929 		fdp->fd_jdir = vp;
930 		VREF(fdp->fd_jdir);
931 	}
932 	FILEDESC_UNLOCK(fdp);
933 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
934 	vrele(oldvp);
935 	VFS_UNLOCK_GIANT(vfslocked);
936 	return (0);
937 }
938 
939 /*
940  * Check permissions, allocate an open file structure,
941  * and call the device open routine if any.
942  *
943  * MP SAFE
944  */
945 #ifndef _SYS_SYSPROTO_H_
946 struct open_args {
947 	char	*path;
948 	int	flags;
949 	int	mode;
950 };
951 #endif
952 int
953 open(td, uap)
954 	struct thread *td;
955 	register struct open_args /* {
956 		char *path;
957 		int flags;
958 		int mode;
959 	} */ *uap;
960 {
961 
962 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
963 }
964 
965 int
966 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
967     int mode)
968 {
969 	struct proc *p = td->td_proc;
970 	struct filedesc *fdp = p->p_fd;
971 	struct file *fp;
972 	struct vnode *vp;
973 	struct vattr vat;
974 	struct mount *mp;
975 	int cmode;
976 	struct file *nfp;
977 	int type, indx, error;
978 	struct flock lf;
979 	struct nameidata nd;
980 	int vfslocked;
981 
982 	AUDIT_ARG(fflags, flags);
983 	AUDIT_ARG(mode, mode);
984 	if ((flags & O_ACCMODE) == O_ACCMODE)
985 		return (EINVAL);
986 	flags = FFLAGS(flags);
987 	error = falloc(td, &nfp, &indx);
988 	if (error)
989 		return (error);
990 	/* An extra reference on `nfp' has been held for us by falloc(). */
991 	fp = nfp;
992 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
993 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
994 	td->td_dupfd = -1;		/* XXX check for fdopen */
995 	error = vn_open(&nd, &flags, cmode, indx);
996 	if (error) {
997 		/*
998 		 * If the vn_open replaced the method vector, something
999 		 * wonderous happened deep below and we just pass it up
1000 		 * pretending we know what we do.
1001 		 */
1002 		if (error == ENXIO && fp->f_ops != &badfileops) {
1003 			fdrop(fp, td);
1004 			td->td_retval[0] = indx;
1005 			return (0);
1006 		}
1007 
1008 		/*
1009 		 * release our own reference
1010 		 */
1011 		fdrop(fp, td);
1012 
1013 		/*
1014 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1015 		 * responsible for dropping the old contents of ofiles[indx]
1016 		 * if it succeeds.
1017 		 */
1018 		if ((error == ENODEV || error == ENXIO) &&
1019 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1020 		    (error =
1021 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1022 			td->td_retval[0] = indx;
1023 			return (0);
1024 		}
1025 		/*
1026 		 * Clean up the descriptor, but only if another thread hadn't
1027 		 * replaced or closed it.
1028 		 */
1029 		fdclose(fdp, fp, indx, td);
1030 
1031 		if (error == ERESTART)
1032 			error = EINTR;
1033 		return (error);
1034 	}
1035 	td->td_dupfd = 0;
1036 	vfslocked = NDHASGIANT(&nd);
1037 	NDFREE(&nd, NDF_ONLY_PNBUF);
1038 	vp = nd.ni_vp;
1039 
1040 	/*
1041 	 * There should be 2 references on the file, one from the descriptor
1042 	 * table, and one for us.
1043 	 *
1044 	 * Handle the case where someone closed the file (via its file
1045 	 * descriptor) while we were blocked.  The end result should look
1046 	 * like opening the file succeeded but it was immediately closed.
1047 	 * We call vn_close() manually because we haven't yet hooked up
1048 	 * the various 'struct file' fields.
1049 	 */
1050 	FILEDESC_LOCK(fdp);
1051 	FILE_LOCK(fp);
1052 	if (fp->f_count == 1) {
1053 		mp = vp->v_mount;
1054 		KASSERT(fdp->fd_ofiles[indx] != fp,
1055 		    ("Open file descriptor lost all refs"));
1056 		FILE_UNLOCK(fp);
1057 		FILEDESC_UNLOCK(fdp);
1058 		VOP_UNLOCK(vp, 0, td);
1059 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1060 		VFS_UNLOCK_GIANT(vfslocked);
1061 		fdrop(fp, td);
1062 		td->td_retval[0] = indx;
1063 		return (0);
1064 	}
1065 	fp->f_vnode = vp;
1066 	if (fp->f_data == NULL)
1067 		fp->f_data = vp;
1068 	fp->f_flag = flags & FMASK;
1069 	if (fp->f_ops == &badfileops)
1070 		fp->f_ops = &vnops;
1071 	fp->f_seqcount = 1;
1072 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1073 	FILE_UNLOCK(fp);
1074 	FILEDESC_UNLOCK(fdp);
1075 
1076 	VOP_UNLOCK(vp, 0, td);
1077 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1078 		lf.l_whence = SEEK_SET;
1079 		lf.l_start = 0;
1080 		lf.l_len = 0;
1081 		if (flags & O_EXLOCK)
1082 			lf.l_type = F_WRLCK;
1083 		else
1084 			lf.l_type = F_RDLCK;
1085 		type = F_FLOCK;
1086 		if ((flags & FNONBLOCK) == 0)
1087 			type |= F_WAIT;
1088 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1089 			    type)) != 0)
1090 			goto bad;
1091 		fp->f_flag |= FHASLOCK;
1092 	}
1093 	if (flags & O_TRUNC) {
1094 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1095 			goto bad;
1096 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1097 		VATTR_NULL(&vat);
1098 		vat.va_size = 0;
1099 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1100 #ifdef MAC
1101 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1102 		if (error == 0)
1103 #endif
1104 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1105 		VOP_UNLOCK(vp, 0, td);
1106 		vn_finished_write(mp);
1107 		if (error)
1108 			goto bad;
1109 	}
1110 	VFS_UNLOCK_GIANT(vfslocked);
1111 	/*
1112 	 * Release our private reference, leaving the one associated with
1113 	 * the descriptor table intact.
1114 	 */
1115 	fdrop(fp, td);
1116 	td->td_retval[0] = indx;
1117 	return (0);
1118 bad:
1119 	VFS_UNLOCK_GIANT(vfslocked);
1120 	fdclose(fdp, fp, indx, td);
1121 	fdrop(fp, td);
1122 	return (error);
1123 }
1124 
1125 #ifdef COMPAT_43
1126 /*
1127  * Create a file.
1128  *
1129  * MP SAFE
1130  */
1131 #ifndef _SYS_SYSPROTO_H_
1132 struct ocreat_args {
1133 	char	*path;
1134 	int	mode;
1135 };
1136 #endif
1137 int
1138 ocreat(td, uap)
1139 	struct thread *td;
1140 	register struct ocreat_args /* {
1141 		char *path;
1142 		int mode;
1143 	} */ *uap;
1144 {
1145 
1146 	return (kern_open(td, uap->path, UIO_USERSPACE,
1147 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1148 }
1149 #endif /* COMPAT_43 */
1150 
1151 /*
1152  * Create a special file.
1153  */
1154 #ifndef _SYS_SYSPROTO_H_
1155 struct mknod_args {
1156 	char	*path;
1157 	int	mode;
1158 	int	dev;
1159 };
1160 #endif
1161 int
1162 mknod(td, uap)
1163 	struct thread *td;
1164 	register struct mknod_args /* {
1165 		char *path;
1166 		int mode;
1167 		int dev;
1168 	} */ *uap;
1169 {
1170 
1171 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1172 }
1173 
1174 int
1175 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1176     int dev)
1177 {
1178 	struct vnode *vp;
1179 	struct mount *mp;
1180 	struct vattr vattr;
1181 	int error;
1182 	int whiteout = 0;
1183 	struct nameidata nd;
1184 	int vfslocked;
1185 
1186 	AUDIT_ARG(mode, mode);
1187 	AUDIT_ARG(dev, dev);
1188 	switch (mode & S_IFMT) {
1189 	case S_IFCHR:
1190 	case S_IFBLK:
1191 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
1192 		break;
1193 	case S_IFMT:
1194 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
1195 		break;
1196 	case S_IFWHT:
1197 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
1198 		break;
1199 	default:
1200 		error = EINVAL;
1201 		break;
1202 	}
1203 	if (error)
1204 		return (error);
1205 restart:
1206 	bwillwrite();
1207 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1208 	    pathseg, path, td);
1209 	if ((error = namei(&nd)) != 0)
1210 		return (error);
1211 	vfslocked = NDHASGIANT(&nd);
1212 	vp = nd.ni_vp;
1213 	if (vp != NULL) {
1214 		NDFREE(&nd, NDF_ONLY_PNBUF);
1215 		if (vp == nd.ni_dvp)
1216 			vrele(nd.ni_dvp);
1217 		else
1218 			vput(nd.ni_dvp);
1219 		vrele(vp);
1220 		VFS_UNLOCK_GIANT(vfslocked);
1221 		return (EEXIST);
1222 	} else {
1223 		VATTR_NULL(&vattr);
1224 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1225 		vattr.va_mode = (mode & ALLPERMS) &
1226 		    ~td->td_proc->p_fd->fd_cmask;
1227 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1228 		vattr.va_rdev = dev;
1229 		whiteout = 0;
1230 
1231 		switch (mode & S_IFMT) {
1232 		case S_IFMT:	/* used by badsect to flag bad sectors */
1233 			vattr.va_type = VBAD;
1234 			break;
1235 		case S_IFCHR:
1236 			vattr.va_type = VCHR;
1237 			break;
1238 		case S_IFBLK:
1239 			vattr.va_type = VBLK;
1240 			break;
1241 		case S_IFWHT:
1242 			whiteout = 1;
1243 			break;
1244 		default:
1245 			panic("kern_mknod: invalid mode");
1246 		}
1247 	}
1248 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1249 		NDFREE(&nd, NDF_ONLY_PNBUF);
1250 		vput(nd.ni_dvp);
1251 		VFS_UNLOCK_GIANT(vfslocked);
1252 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1253 			return (error);
1254 		goto restart;
1255 	}
1256 #ifdef MAC
1257 	if (error == 0 && !whiteout)
1258 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1259 		    &nd.ni_cnd, &vattr);
1260 #endif
1261 	if (!error) {
1262 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1263 		if (whiteout)
1264 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1265 		else {
1266 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1267 						&nd.ni_cnd, &vattr);
1268 			if (error == 0)
1269 				vput(nd.ni_vp);
1270 		}
1271 	}
1272 	NDFREE(&nd, NDF_ONLY_PNBUF);
1273 	vput(nd.ni_dvp);
1274 	vn_finished_write(mp);
1275 	VFS_UNLOCK_GIANT(vfslocked);
1276 	return (error);
1277 }
1278 
1279 /*
1280  * Create a named pipe.
1281  */
1282 #ifndef _SYS_SYSPROTO_H_
1283 struct mkfifo_args {
1284 	char	*path;
1285 	int	mode;
1286 };
1287 #endif
1288 int
1289 mkfifo(td, uap)
1290 	struct thread *td;
1291 	register struct mkfifo_args /* {
1292 		char *path;
1293 		int mode;
1294 	} */ *uap;
1295 {
1296 
1297 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1298 }
1299 
1300 int
1301 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1302 {
1303 	struct mount *mp;
1304 	struct vattr vattr;
1305 	int error;
1306 	struct nameidata nd;
1307 	int vfslocked;
1308 
1309 	AUDIT_ARG(mode, mode);
1310 restart:
1311 	bwillwrite();
1312 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1313 	    pathseg, path, td);
1314 	if ((error = namei(&nd)) != 0)
1315 		return (error);
1316 	vfslocked = NDHASGIANT(&nd);
1317 	if (nd.ni_vp != NULL) {
1318 		NDFREE(&nd, NDF_ONLY_PNBUF);
1319 		if (nd.ni_vp == nd.ni_dvp)
1320 			vrele(nd.ni_dvp);
1321 		else
1322 			vput(nd.ni_dvp);
1323 		vrele(nd.ni_vp);
1324 		VFS_UNLOCK_GIANT(vfslocked);
1325 		return (EEXIST);
1326 	}
1327 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1328 		NDFREE(&nd, NDF_ONLY_PNBUF);
1329 		vput(nd.ni_dvp);
1330 		VFS_UNLOCK_GIANT(vfslocked);
1331 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1332 			return (error);
1333 		goto restart;
1334 	}
1335 	VATTR_NULL(&vattr);
1336 	vattr.va_type = VFIFO;
1337 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1338 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1339 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1340 #ifdef MAC
1341 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1342 	    &vattr);
1343 	if (error)
1344 		goto out;
1345 #endif
1346 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1347 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1348 	if (error == 0)
1349 		vput(nd.ni_vp);
1350 #ifdef MAC
1351 out:
1352 #endif
1353 	vput(nd.ni_dvp);
1354 	vn_finished_write(mp);
1355 	VFS_UNLOCK_GIANT(vfslocked);
1356 	NDFREE(&nd, NDF_ONLY_PNBUF);
1357 	return (error);
1358 }
1359 
1360 /*
1361  * Make a hard file link.
1362  */
1363 #ifndef _SYS_SYSPROTO_H_
1364 struct link_args {
1365 	char	*path;
1366 	char	*link;
1367 };
1368 #endif
1369 int
1370 link(td, uap)
1371 	struct thread *td;
1372 	register struct link_args /* {
1373 		char *path;
1374 		char *link;
1375 	} */ *uap;
1376 {
1377 	int error;
1378 
1379 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1380 	return (error);
1381 }
1382 
1383 static int hardlink_check_uid = 0;
1384 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1385     &hardlink_check_uid, 0,
1386     "Unprivileged processes cannot create hard links to files owned by other "
1387     "users");
1388 static int hardlink_check_gid = 0;
1389 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1390     &hardlink_check_gid, 0,
1391     "Unprivileged processes cannot create hard links to files owned by other "
1392     "groups");
1393 
1394 static int
1395 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1396 {
1397 	struct vattr va;
1398 	int error;
1399 
1400 	if (!hardlink_check_uid && !hardlink_check_gid)
1401 		return (0);
1402 
1403 	error = VOP_GETATTR(vp, &va, cred, td);
1404 	if (error != 0)
1405 		return (error);
1406 
1407 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
1408 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1409 		    SUSER_ALLOWJAIL);
1410 		if (error)
1411 			return (error);
1412 	}
1413 
1414 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
1415 		error = priv_check_cred(cred, PRIV_VFS_LINK,
1416 		    SUSER_ALLOWJAIL);
1417 		if (error)
1418 			return (error);
1419 	}
1420 
1421 	return (0);
1422 }
1423 
1424 int
1425 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1426 {
1427 	struct vnode *vp;
1428 	struct mount *mp;
1429 	struct nameidata nd;
1430 	int vfslocked;
1431 	int lvfslocked;
1432 	int error;
1433 
1434 	bwillwrite();
1435 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1436 	if ((error = namei(&nd)) != 0)
1437 		return (error);
1438 	vfslocked = NDHASGIANT(&nd);
1439 	NDFREE(&nd, NDF_ONLY_PNBUF);
1440 	vp = nd.ni_vp;
1441 	if (vp->v_type == VDIR) {
1442 		vrele(vp);
1443 		VFS_UNLOCK_GIANT(vfslocked);
1444 		return (EPERM);		/* POSIX */
1445 	}
1446 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1447 		vrele(vp);
1448 		VFS_UNLOCK_GIANT(vfslocked);
1449 		return (error);
1450 	}
1451 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1452 	    segflg, link, td);
1453 	if ((error = namei(&nd)) == 0) {
1454 		lvfslocked = NDHASGIANT(&nd);
1455 		if (nd.ni_vp != NULL) {
1456 			if (nd.ni_dvp == nd.ni_vp)
1457 				vrele(nd.ni_dvp);
1458 			else
1459 				vput(nd.ni_dvp);
1460 			vrele(nd.ni_vp);
1461 			error = EEXIST;
1462 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1463 		    == 0) {
1464 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1465 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1466 			error = can_hardlink(vp, td, td->td_ucred);
1467 			if (error == 0)
1468 #ifdef MAC
1469 				error = mac_check_vnode_link(td->td_ucred,
1470 				    nd.ni_dvp, vp, &nd.ni_cnd);
1471 			if (error == 0)
1472 #endif
1473 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1474 			VOP_UNLOCK(vp, 0, td);
1475 			vput(nd.ni_dvp);
1476 		}
1477 		NDFREE(&nd, NDF_ONLY_PNBUF);
1478 		VFS_UNLOCK_GIANT(lvfslocked);
1479 	}
1480 	vrele(vp);
1481 	vn_finished_write(mp);
1482 	VFS_UNLOCK_GIANT(vfslocked);
1483 	return (error);
1484 }
1485 
1486 /*
1487  * Make a symbolic link.
1488  */
1489 #ifndef _SYS_SYSPROTO_H_
1490 struct symlink_args {
1491 	char	*path;
1492 	char	*link;
1493 };
1494 #endif
1495 int
1496 symlink(td, uap)
1497 	struct thread *td;
1498 	register struct symlink_args /* {
1499 		char *path;
1500 		char *link;
1501 	} */ *uap;
1502 {
1503 
1504 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1505 }
1506 
1507 int
1508 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1509 {
1510 	struct mount *mp;
1511 	struct vattr vattr;
1512 	char *syspath;
1513 	int error;
1514 	struct nameidata nd;
1515 	int vfslocked;
1516 
1517 	if (segflg == UIO_SYSSPACE) {
1518 		syspath = path;
1519 	} else {
1520 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1521 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1522 			goto out;
1523 	}
1524 	AUDIT_ARG(text, syspath);
1525 restart:
1526 	bwillwrite();
1527 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1528 	    segflg, link, td);
1529 	if ((error = namei(&nd)) != 0)
1530 		goto out;
1531 	vfslocked = NDHASGIANT(&nd);
1532 	if (nd.ni_vp) {
1533 		NDFREE(&nd, NDF_ONLY_PNBUF);
1534 		if (nd.ni_vp == nd.ni_dvp)
1535 			vrele(nd.ni_dvp);
1536 		else
1537 			vput(nd.ni_dvp);
1538 		vrele(nd.ni_vp);
1539 		VFS_UNLOCK_GIANT(vfslocked);
1540 		error = EEXIST;
1541 		goto out;
1542 	}
1543 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1544 		NDFREE(&nd, NDF_ONLY_PNBUF);
1545 		vput(nd.ni_dvp);
1546 		VFS_UNLOCK_GIANT(vfslocked);
1547 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1548 			goto out;
1549 		goto restart;
1550 	}
1551 	VATTR_NULL(&vattr);
1552 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1553 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1554 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1555 #ifdef MAC
1556 	vattr.va_type = VLNK;
1557 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1558 	    &vattr);
1559 	if (error)
1560 		goto out2;
1561 #endif
1562 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1563 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1564 	if (error == 0)
1565 		vput(nd.ni_vp);
1566 #ifdef MAC
1567 out2:
1568 #endif
1569 	NDFREE(&nd, NDF_ONLY_PNBUF);
1570 	vput(nd.ni_dvp);
1571 	vn_finished_write(mp);
1572 	VFS_UNLOCK_GIANT(vfslocked);
1573 out:
1574 	if (segflg != UIO_SYSSPACE)
1575 		uma_zfree(namei_zone, syspath);
1576 	return (error);
1577 }
1578 
1579 /*
1580  * Delete a whiteout from the filesystem.
1581  */
1582 int
1583 undelete(td, uap)
1584 	struct thread *td;
1585 	register struct undelete_args /* {
1586 		char *path;
1587 	} */ *uap;
1588 {
1589 	int error;
1590 	struct mount *mp;
1591 	struct nameidata nd;
1592 	int vfslocked;
1593 
1594 restart:
1595 	bwillwrite();
1596 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1597 	    UIO_USERSPACE, uap->path, td);
1598 	error = namei(&nd);
1599 	if (error)
1600 		return (error);
1601 	vfslocked = NDHASGIANT(&nd);
1602 
1603 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1604 		NDFREE(&nd, NDF_ONLY_PNBUF);
1605 		if (nd.ni_vp == nd.ni_dvp)
1606 			vrele(nd.ni_dvp);
1607 		else
1608 			vput(nd.ni_dvp);
1609 		if (nd.ni_vp)
1610 			vrele(nd.ni_vp);
1611 		VFS_UNLOCK_GIANT(vfslocked);
1612 		return (EEXIST);
1613 	}
1614 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1615 		NDFREE(&nd, NDF_ONLY_PNBUF);
1616 		vput(nd.ni_dvp);
1617 		VFS_UNLOCK_GIANT(vfslocked);
1618 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1619 			return (error);
1620 		goto restart;
1621 	}
1622 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1623 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1624 	NDFREE(&nd, NDF_ONLY_PNBUF);
1625 	vput(nd.ni_dvp);
1626 	vn_finished_write(mp);
1627 	VFS_UNLOCK_GIANT(vfslocked);
1628 	return (error);
1629 }
1630 
1631 /*
1632  * Delete a name from the filesystem.
1633  */
1634 #ifndef _SYS_SYSPROTO_H_
1635 struct unlink_args {
1636 	char	*path;
1637 };
1638 #endif
1639 int
1640 unlink(td, uap)
1641 	struct thread *td;
1642 	struct unlink_args /* {
1643 		char *path;
1644 	} */ *uap;
1645 {
1646 	int error;
1647 
1648 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1649 	return (error);
1650 }
1651 
1652 int
1653 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1654 {
1655 	struct mount *mp;
1656 	struct vnode *vp;
1657 	int error;
1658 	struct nameidata nd;
1659 	int vfslocked;
1660 
1661 restart:
1662 	bwillwrite();
1663 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1664 	    pathseg, path, td);
1665 	if ((error = namei(&nd)) != 0)
1666 		return (error == EINVAL ? EPERM : error);
1667 	vfslocked = NDHASGIANT(&nd);
1668 	vp = nd.ni_vp;
1669 	if (vp->v_type == VDIR)
1670 		error = EPERM;		/* POSIX */
1671 	else {
1672 		/*
1673 		 * The root of a mounted filesystem cannot be deleted.
1674 		 *
1675 		 * XXX: can this only be a VDIR case?
1676 		 */
1677 		if (vp->v_vflag & VV_ROOT)
1678 			error = EBUSY;
1679 	}
1680 	if (error == 0) {
1681 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1682 			NDFREE(&nd, NDF_ONLY_PNBUF);
1683 			vput(nd.ni_dvp);
1684 			if (vp == nd.ni_dvp)
1685 				vrele(vp);
1686 			else
1687 				vput(vp);
1688 			VFS_UNLOCK_GIANT(vfslocked);
1689 			if ((error = vn_start_write(NULL, &mp,
1690 			    V_XSLEEP | PCATCH)) != 0)
1691 				return (error);
1692 			goto restart;
1693 		}
1694 #ifdef MAC
1695 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1696 		    &nd.ni_cnd);
1697 		if (error)
1698 			goto out;
1699 #endif
1700 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1701 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1702 #ifdef MAC
1703 out:
1704 #endif
1705 		vn_finished_write(mp);
1706 	}
1707 	NDFREE(&nd, NDF_ONLY_PNBUF);
1708 	vput(nd.ni_dvp);
1709 	if (vp == nd.ni_dvp)
1710 		vrele(vp);
1711 	else
1712 		vput(vp);
1713 	VFS_UNLOCK_GIANT(vfslocked);
1714 	return (error);
1715 }
1716 
1717 /*
1718  * Reposition read/write file offset.
1719  */
1720 #ifndef _SYS_SYSPROTO_H_
1721 struct lseek_args {
1722 	int	fd;
1723 	int	pad;
1724 	off_t	offset;
1725 	int	whence;
1726 };
1727 #endif
1728 int
1729 lseek(td, uap)
1730 	struct thread *td;
1731 	register struct lseek_args /* {
1732 		int fd;
1733 		int pad;
1734 		off_t offset;
1735 		int whence;
1736 	} */ *uap;
1737 {
1738 	struct ucred *cred = td->td_ucred;
1739 	struct file *fp;
1740 	struct vnode *vp;
1741 	struct vattr vattr;
1742 	off_t offset;
1743 	int error, noneg;
1744 	int vfslocked;
1745 
1746 	if ((error = fget(td, uap->fd, &fp)) != 0)
1747 		return (error);
1748 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1749 		fdrop(fp, td);
1750 		return (ESPIPE);
1751 	}
1752 	vp = fp->f_vnode;
1753 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1754 	noneg = (vp->v_type != VCHR);
1755 	offset = uap->offset;
1756 	switch (uap->whence) {
1757 	case L_INCR:
1758 		if (noneg &&
1759 		    (fp->f_offset < 0 ||
1760 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1761 			error = EOVERFLOW;
1762 			break;
1763 		}
1764 		offset += fp->f_offset;
1765 		break;
1766 	case L_XTND:
1767 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1768 		error = VOP_GETATTR(vp, &vattr, cred, td);
1769 		VOP_UNLOCK(vp, 0, td);
1770 		if (error)
1771 			break;
1772 		if (noneg &&
1773 		    (vattr.va_size > OFF_MAX ||
1774 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1775 			error = EOVERFLOW;
1776 			break;
1777 		}
1778 		offset += vattr.va_size;
1779 		break;
1780 	case L_SET:
1781 		break;
1782 	default:
1783 		error = EINVAL;
1784 	}
1785 	if (error == 0 && noneg && offset < 0)
1786 		error = EINVAL;
1787 	if (error != 0)
1788 		goto drop;
1789 	fp->f_offset = offset;
1790 	*(off_t *)(td->td_retval) = fp->f_offset;
1791 drop:
1792 	fdrop(fp, td);
1793 	VFS_UNLOCK_GIANT(vfslocked);
1794 	return (error);
1795 }
1796 
1797 #if defined(COMPAT_43)
1798 /*
1799  * Reposition read/write file offset.
1800  */
1801 #ifndef _SYS_SYSPROTO_H_
1802 struct olseek_args {
1803 	int	fd;
1804 	long	offset;
1805 	int	whence;
1806 };
1807 #endif
1808 int
1809 olseek(td, uap)
1810 	struct thread *td;
1811 	register struct olseek_args /* {
1812 		int fd;
1813 		long offset;
1814 		int whence;
1815 	} */ *uap;
1816 {
1817 	struct lseek_args /* {
1818 		int fd;
1819 		int pad;
1820 		off_t offset;
1821 		int whence;
1822 	} */ nuap;
1823 	int error;
1824 
1825 	nuap.fd = uap->fd;
1826 	nuap.offset = uap->offset;
1827 	nuap.whence = uap->whence;
1828 	error = lseek(td, &nuap);
1829 	return (error);
1830 }
1831 #endif /* COMPAT_43 */
1832 
1833 /*
1834  * Check access permissions using passed credentials.
1835  */
1836 static int
1837 vn_access(vp, user_flags, cred, td)
1838 	struct vnode	*vp;
1839 	int		user_flags;
1840 	struct ucred	*cred;
1841 	struct thread	*td;
1842 {
1843 	int error, flags;
1844 
1845 	/* Flags == 0 means only check for existence. */
1846 	error = 0;
1847 	if (user_flags) {
1848 		flags = 0;
1849 		if (user_flags & R_OK)
1850 			flags |= VREAD;
1851 		if (user_flags & W_OK)
1852 			flags |= VWRITE;
1853 		if (user_flags & X_OK)
1854 			flags |= VEXEC;
1855 #ifdef MAC
1856 		error = mac_check_vnode_access(cred, vp, flags);
1857 		if (error)
1858 			return (error);
1859 #endif
1860 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1861 			error = VOP_ACCESS(vp, flags, cred, td);
1862 	}
1863 	return (error);
1864 }
1865 
1866 /*
1867  * Check access permissions using "real" credentials.
1868  */
1869 #ifndef _SYS_SYSPROTO_H_
1870 struct access_args {
1871 	char	*path;
1872 	int	flags;
1873 };
1874 #endif
1875 int
1876 access(td, uap)
1877 	struct thread *td;
1878 	register struct access_args /* {
1879 		char *path;
1880 		int flags;
1881 	} */ *uap;
1882 {
1883 
1884 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1885 }
1886 
1887 int
1888 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1889 {
1890 	struct ucred *cred, *tmpcred;
1891 	register struct vnode *vp;
1892 	struct nameidata nd;
1893 	int vfslocked;
1894 	int error;
1895 
1896 	/*
1897 	 * Create and modify a temporary credential instead of one that
1898 	 * is potentially shared.  This could also mess up socket
1899 	 * buffer accounting which can run in an interrupt context.
1900 	 */
1901 	cred = td->td_ucred;
1902 	tmpcred = crdup(cred);
1903 	tmpcred->cr_uid = cred->cr_ruid;
1904 	tmpcred->cr_groups[0] = cred->cr_rgid;
1905 	td->td_ucred = tmpcred;
1906 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1907 	    pathseg, path, td);
1908 	if ((error = namei(&nd)) != 0)
1909 		goto out1;
1910 	vfslocked = NDHASGIANT(&nd);
1911 	vp = nd.ni_vp;
1912 
1913 	error = vn_access(vp, flags, tmpcred, td);
1914 	NDFREE(&nd, NDF_ONLY_PNBUF);
1915 	vput(vp);
1916 	VFS_UNLOCK_GIANT(vfslocked);
1917 out1:
1918 	td->td_ucred = cred;
1919 	crfree(tmpcred);
1920 	return (error);
1921 }
1922 
1923 /*
1924  * Check access permissions using "effective" credentials.
1925  */
1926 #ifndef _SYS_SYSPROTO_H_
1927 struct eaccess_args {
1928 	char	*path;
1929 	int	flags;
1930 };
1931 #endif
1932 int
1933 eaccess(td, uap)
1934 	struct thread *td;
1935 	register struct eaccess_args /* {
1936 		char *path;
1937 		int flags;
1938 	} */ *uap;
1939 {
1940 
1941 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1942 }
1943 
1944 int
1945 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1946 {
1947 	struct nameidata nd;
1948 	struct vnode *vp;
1949 	int vfslocked;
1950 	int error;
1951 
1952 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1953 	    pathseg, path, td);
1954 	if ((error = namei(&nd)) != 0)
1955 		return (error);
1956 	vp = nd.ni_vp;
1957 	vfslocked = NDHASGIANT(&nd);
1958 	error = vn_access(vp, flags, td->td_ucred, td);
1959 	NDFREE(&nd, NDF_ONLY_PNBUF);
1960 	vput(vp);
1961 	VFS_UNLOCK_GIANT(vfslocked);
1962 	return (error);
1963 }
1964 
1965 #if defined(COMPAT_43)
1966 /*
1967  * Get file status; this version follows links.
1968  */
1969 #ifndef _SYS_SYSPROTO_H_
1970 struct ostat_args {
1971 	char	*path;
1972 	struct ostat *ub;
1973 };
1974 #endif
1975 int
1976 ostat(td, uap)
1977 	struct thread *td;
1978 	register struct ostat_args /* {
1979 		char *path;
1980 		struct ostat *ub;
1981 	} */ *uap;
1982 {
1983 	struct stat sb;
1984 	struct ostat osb;
1985 	int error;
1986 
1987 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1988 	if (error)
1989 		return (error);
1990 	cvtstat(&sb, &osb);
1991 	error = copyout(&osb, uap->ub, sizeof (osb));
1992 	return (error);
1993 }
1994 
1995 /*
1996  * Get file status; this version does not follow links.
1997  */
1998 #ifndef _SYS_SYSPROTO_H_
1999 struct olstat_args {
2000 	char	*path;
2001 	struct ostat *ub;
2002 };
2003 #endif
2004 int
2005 olstat(td, uap)
2006 	struct thread *td;
2007 	register struct olstat_args /* {
2008 		char *path;
2009 		struct ostat *ub;
2010 	} */ *uap;
2011 {
2012 	struct stat sb;
2013 	struct ostat osb;
2014 	int error;
2015 
2016 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2017 	if (error)
2018 		return (error);
2019 	cvtstat(&sb, &osb);
2020 	error = copyout(&osb, uap->ub, sizeof (osb));
2021 	return (error);
2022 }
2023 
2024 /*
2025  * Convert from an old to a new stat structure.
2026  */
2027 void
2028 cvtstat(st, ost)
2029 	struct stat *st;
2030 	struct ostat *ost;
2031 {
2032 
2033 	ost->st_dev = st->st_dev;
2034 	ost->st_ino = st->st_ino;
2035 	ost->st_mode = st->st_mode;
2036 	ost->st_nlink = st->st_nlink;
2037 	ost->st_uid = st->st_uid;
2038 	ost->st_gid = st->st_gid;
2039 	ost->st_rdev = st->st_rdev;
2040 	if (st->st_size < (quad_t)1 << 32)
2041 		ost->st_size = st->st_size;
2042 	else
2043 		ost->st_size = -2;
2044 	ost->st_atime = st->st_atime;
2045 	ost->st_mtime = st->st_mtime;
2046 	ost->st_ctime = st->st_ctime;
2047 	ost->st_blksize = st->st_blksize;
2048 	ost->st_blocks = st->st_blocks;
2049 	ost->st_flags = st->st_flags;
2050 	ost->st_gen = st->st_gen;
2051 }
2052 #endif /* COMPAT_43 */
2053 
2054 /*
2055  * Get file status; this version follows links.
2056  */
2057 #ifndef _SYS_SYSPROTO_H_
2058 struct stat_args {
2059 	char	*path;
2060 	struct stat *ub;
2061 };
2062 #endif
2063 int
2064 stat(td, uap)
2065 	struct thread *td;
2066 	register struct stat_args /* {
2067 		char *path;
2068 		struct stat *ub;
2069 	} */ *uap;
2070 {
2071 	struct stat sb;
2072 	int error;
2073 
2074 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2075 	if (error == 0)
2076 		error = copyout(&sb, uap->ub, sizeof (sb));
2077 	return (error);
2078 }
2079 
2080 int
2081 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2082 {
2083 	struct nameidata nd;
2084 	struct stat sb;
2085 	int error, vfslocked;
2086 
2087 	NDINIT(&nd, LOOKUP,
2088 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2089 	    pathseg, path, td);
2090 	if ((error = namei(&nd)) != 0)
2091 		return (error);
2092 	vfslocked = NDHASGIANT(&nd);
2093 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2094 	NDFREE(&nd, NDF_ONLY_PNBUF);
2095 	vput(nd.ni_vp);
2096 	VFS_UNLOCK_GIANT(vfslocked);
2097 	if (mtx_owned(&Giant))
2098 		printf("stat(%d): %s\n", vfslocked, path);
2099 	if (error)
2100 		return (error);
2101 	*sbp = sb;
2102 	return (0);
2103 }
2104 
2105 /*
2106  * Get file status; this version does not follow links.
2107  */
2108 #ifndef _SYS_SYSPROTO_H_
2109 struct lstat_args {
2110 	char	*path;
2111 	struct stat *ub;
2112 };
2113 #endif
2114 int
2115 lstat(td, uap)
2116 	struct thread *td;
2117 	register struct lstat_args /* {
2118 		char *path;
2119 		struct stat *ub;
2120 	} */ *uap;
2121 {
2122 	struct stat sb;
2123 	int error;
2124 
2125 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2126 	if (error == 0)
2127 		error = copyout(&sb, uap->ub, sizeof (sb));
2128 	return (error);
2129 }
2130 
2131 int
2132 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2133 {
2134 	struct vnode *vp;
2135 	struct stat sb;
2136 	struct nameidata nd;
2137 	int error, vfslocked;
2138 
2139 	NDINIT(&nd, LOOKUP,
2140 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2141 	    pathseg, path, td);
2142 	if ((error = namei(&nd)) != 0)
2143 		return (error);
2144 	vfslocked = NDHASGIANT(&nd);
2145 	vp = nd.ni_vp;
2146 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2147 	NDFREE(&nd, NDF_ONLY_PNBUF);
2148 	vput(vp);
2149 	VFS_UNLOCK_GIANT(vfslocked);
2150 	if (error)
2151 		return (error);
2152 	*sbp = sb;
2153 	return (0);
2154 }
2155 
2156 /*
2157  * Implementation of the NetBSD [l]stat() functions.
2158  */
2159 void
2160 cvtnstat(sb, nsb)
2161 	struct stat *sb;
2162 	struct nstat *nsb;
2163 {
2164 	bzero(nsb, sizeof *nsb);
2165 	nsb->st_dev = sb->st_dev;
2166 	nsb->st_ino = sb->st_ino;
2167 	nsb->st_mode = sb->st_mode;
2168 	nsb->st_nlink = sb->st_nlink;
2169 	nsb->st_uid = sb->st_uid;
2170 	nsb->st_gid = sb->st_gid;
2171 	nsb->st_rdev = sb->st_rdev;
2172 	nsb->st_atimespec = sb->st_atimespec;
2173 	nsb->st_mtimespec = sb->st_mtimespec;
2174 	nsb->st_ctimespec = sb->st_ctimespec;
2175 	nsb->st_size = sb->st_size;
2176 	nsb->st_blocks = sb->st_blocks;
2177 	nsb->st_blksize = sb->st_blksize;
2178 	nsb->st_flags = sb->st_flags;
2179 	nsb->st_gen = sb->st_gen;
2180 	nsb->st_birthtimespec = sb->st_birthtimespec;
2181 }
2182 
2183 #ifndef _SYS_SYSPROTO_H_
2184 struct nstat_args {
2185 	char	*path;
2186 	struct nstat *ub;
2187 };
2188 #endif
2189 int
2190 nstat(td, uap)
2191 	struct thread *td;
2192 	register struct nstat_args /* {
2193 		char *path;
2194 		struct nstat *ub;
2195 	} */ *uap;
2196 {
2197 	struct stat sb;
2198 	struct nstat nsb;
2199 	int error;
2200 
2201 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2202 	if (error)
2203 		return (error);
2204 	cvtnstat(&sb, &nsb);
2205 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2206 	return (error);
2207 }
2208 
2209 /*
2210  * NetBSD lstat.  Get file status; this version does not follow links.
2211  */
2212 #ifndef _SYS_SYSPROTO_H_
2213 struct lstat_args {
2214 	char	*path;
2215 	struct stat *ub;
2216 };
2217 #endif
2218 int
2219 nlstat(td, uap)
2220 	struct thread *td;
2221 	register struct nlstat_args /* {
2222 		char *path;
2223 		struct nstat *ub;
2224 	} */ *uap;
2225 {
2226 	struct stat sb;
2227 	struct nstat nsb;
2228 	int error;
2229 
2230 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2231 	if (error)
2232 		return (error);
2233 	cvtnstat(&sb, &nsb);
2234 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2235 	return (error);
2236 }
2237 
2238 /*
2239  * Get configurable pathname variables.
2240  */
2241 #ifndef _SYS_SYSPROTO_H_
2242 struct pathconf_args {
2243 	char	*path;
2244 	int	name;
2245 };
2246 #endif
2247 int
2248 pathconf(td, uap)
2249 	struct thread *td;
2250 	register struct pathconf_args /* {
2251 		char *path;
2252 		int name;
2253 	} */ *uap;
2254 {
2255 
2256 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2257 }
2258 
2259 int
2260 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2261 {
2262 	struct nameidata nd;
2263 	int error, vfslocked;
2264 
2265 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2266 	    pathseg, path, td);
2267 	if ((error = namei(&nd)) != 0)
2268 		return (error);
2269 	vfslocked = NDHASGIANT(&nd);
2270 	NDFREE(&nd, NDF_ONLY_PNBUF);
2271 
2272 	/* If asynchronous I/O is available, it works for all files. */
2273 	if (name == _PC_ASYNC_IO)
2274 		td->td_retval[0] = async_io_version;
2275 	else
2276 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2277 	vput(nd.ni_vp);
2278 	VFS_UNLOCK_GIANT(vfslocked);
2279 	return (error);
2280 }
2281 
2282 /*
2283  * Return target name of a symbolic link.
2284  */
2285 #ifndef _SYS_SYSPROTO_H_
2286 struct readlink_args {
2287 	char	*path;
2288 	char	*buf;
2289 	int	count;
2290 };
2291 #endif
2292 int
2293 readlink(td, uap)
2294 	struct thread *td;
2295 	register struct readlink_args /* {
2296 		char *path;
2297 		char *buf;
2298 		int count;
2299 	} */ *uap;
2300 {
2301 
2302 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2303 	    UIO_USERSPACE, uap->count));
2304 }
2305 
2306 int
2307 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2308     enum uio_seg bufseg, int count)
2309 {
2310 	register struct vnode *vp;
2311 	struct iovec aiov;
2312 	struct uio auio;
2313 	int error;
2314 	struct nameidata nd;
2315 	int vfslocked;
2316 
2317 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2318 	    pathseg, path, td);
2319 	if ((error = namei(&nd)) != 0)
2320 		return (error);
2321 	NDFREE(&nd, NDF_ONLY_PNBUF);
2322 	vfslocked = NDHASGIANT(&nd);
2323 	vp = nd.ni_vp;
2324 #ifdef MAC
2325 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2326 	if (error) {
2327 		vput(vp);
2328 		VFS_UNLOCK_GIANT(vfslocked);
2329 		return (error);
2330 	}
2331 #endif
2332 	if (vp->v_type != VLNK)
2333 		error = EINVAL;
2334 	else {
2335 		aiov.iov_base = buf;
2336 		aiov.iov_len = count;
2337 		auio.uio_iov = &aiov;
2338 		auio.uio_iovcnt = 1;
2339 		auio.uio_offset = 0;
2340 		auio.uio_rw = UIO_READ;
2341 		auio.uio_segflg = bufseg;
2342 		auio.uio_td = td;
2343 		auio.uio_resid = count;
2344 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2345 	}
2346 	vput(vp);
2347 	VFS_UNLOCK_GIANT(vfslocked);
2348 	td->td_retval[0] = count - auio.uio_resid;
2349 	return (error);
2350 }
2351 
2352 /*
2353  * Common implementation code for chflags() and fchflags().
2354  */
2355 static int
2356 setfflags(td, vp, flags)
2357 	struct thread *td;
2358 	struct vnode *vp;
2359 	int flags;
2360 {
2361 	int error;
2362 	struct mount *mp;
2363 	struct vattr vattr;
2364 
2365 	/*
2366 	 * Prevent non-root users from setting flags on devices.  When
2367 	 * a device is reused, users can retain ownership of the device
2368 	 * if they are allowed to set flags and programs assume that
2369 	 * chown can't fail when done as root.
2370 	 */
2371 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2372 		error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV,
2373 		    SUSER_ALLOWJAIL);
2374 		if (error)
2375 			return (error);
2376 	}
2377 
2378 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2379 		return (error);
2380 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2381 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2382 	VATTR_NULL(&vattr);
2383 	vattr.va_flags = flags;
2384 #ifdef MAC
2385 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2386 	if (error == 0)
2387 #endif
2388 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2389 	VOP_UNLOCK(vp, 0, td);
2390 	vn_finished_write(mp);
2391 	return (error);
2392 }
2393 
2394 /*
2395  * Change flags of a file given a path name.
2396  */
2397 #ifndef _SYS_SYSPROTO_H_
2398 struct chflags_args {
2399 	char	*path;
2400 	int	flags;
2401 };
2402 #endif
2403 int
2404 chflags(td, uap)
2405 	struct thread *td;
2406 	register struct chflags_args /* {
2407 		char *path;
2408 		int flags;
2409 	} */ *uap;
2410 {
2411 	int error;
2412 	struct nameidata nd;
2413 	int vfslocked;
2414 
2415 	AUDIT_ARG(fflags, uap->flags);
2416 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2417 	    uap->path, td);
2418 	if ((error = namei(&nd)) != 0)
2419 		return (error);
2420 	NDFREE(&nd, NDF_ONLY_PNBUF);
2421 	vfslocked = NDHASGIANT(&nd);
2422 	error = setfflags(td, nd.ni_vp, uap->flags);
2423 	vrele(nd.ni_vp);
2424 	VFS_UNLOCK_GIANT(vfslocked);
2425 	return (error);
2426 }
2427 
2428 /*
2429  * Same as chflags() but doesn't follow symlinks.
2430  */
2431 int
2432 lchflags(td, uap)
2433 	struct thread *td;
2434 	register struct lchflags_args /* {
2435 		char *path;
2436 		int flags;
2437 	} */ *uap;
2438 {
2439 	int error;
2440 	struct nameidata nd;
2441 	int vfslocked;
2442 
2443 	AUDIT_ARG(fflags, uap->flags);
2444 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2445 	    uap->path, td);
2446 	if ((error = namei(&nd)) != 0)
2447 		return (error);
2448 	vfslocked = NDHASGIANT(&nd);
2449 	NDFREE(&nd, NDF_ONLY_PNBUF);
2450 	error = setfflags(td, nd.ni_vp, uap->flags);
2451 	vrele(nd.ni_vp);
2452 	VFS_UNLOCK_GIANT(vfslocked);
2453 	return (error);
2454 }
2455 
2456 /*
2457  * Change flags of a file given a file descriptor.
2458  */
2459 #ifndef _SYS_SYSPROTO_H_
2460 struct fchflags_args {
2461 	int	fd;
2462 	int	flags;
2463 };
2464 #endif
2465 int
2466 fchflags(td, uap)
2467 	struct thread *td;
2468 	register struct fchflags_args /* {
2469 		int fd;
2470 		int flags;
2471 	} */ *uap;
2472 {
2473 	struct file *fp;
2474 	int vfslocked;
2475 	int error;
2476 
2477 	AUDIT_ARG(fd, uap->fd);
2478 	AUDIT_ARG(fflags, uap->flags);
2479 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2480 		return (error);
2481 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2482 #ifdef AUDIT
2483 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2484 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2485 	VOP_UNLOCK(fp->f_vnode, 0, td);
2486 #endif
2487 	error = setfflags(td, fp->f_vnode, uap->flags);
2488 	VFS_UNLOCK_GIANT(vfslocked);
2489 	fdrop(fp, td);
2490 	return (error);
2491 }
2492 
2493 /*
2494  * Common implementation code for chmod(), lchmod() and fchmod().
2495  */
2496 static int
2497 setfmode(td, vp, mode)
2498 	struct thread *td;
2499 	struct vnode *vp;
2500 	int mode;
2501 {
2502 	int error;
2503 	struct mount *mp;
2504 	struct vattr vattr;
2505 
2506 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2507 		return (error);
2508 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2509 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2510 	VATTR_NULL(&vattr);
2511 	vattr.va_mode = mode & ALLPERMS;
2512 #ifdef MAC
2513 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2514 	if (error == 0)
2515 #endif
2516 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2517 	VOP_UNLOCK(vp, 0, td);
2518 	vn_finished_write(mp);
2519 	return (error);
2520 }
2521 
2522 /*
2523  * Change mode of a file given path name.
2524  */
2525 #ifndef _SYS_SYSPROTO_H_
2526 struct chmod_args {
2527 	char	*path;
2528 	int	mode;
2529 };
2530 #endif
2531 int
2532 chmod(td, uap)
2533 	struct thread *td;
2534 	register struct chmod_args /* {
2535 		char *path;
2536 		int mode;
2537 	} */ *uap;
2538 {
2539 
2540 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2541 }
2542 
2543 int
2544 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2545 {
2546 	int error;
2547 	struct nameidata nd;
2548 	int vfslocked;
2549 
2550 	AUDIT_ARG(mode, mode);
2551 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2552 	if ((error = namei(&nd)) != 0)
2553 		return (error);
2554 	vfslocked = NDHASGIANT(&nd);
2555 	NDFREE(&nd, NDF_ONLY_PNBUF);
2556 	error = setfmode(td, nd.ni_vp, mode);
2557 	vrele(nd.ni_vp);
2558 	VFS_UNLOCK_GIANT(vfslocked);
2559 	return (error);
2560 }
2561 
2562 /*
2563  * Change mode of a file given path name (don't follow links.)
2564  */
2565 #ifndef _SYS_SYSPROTO_H_
2566 struct lchmod_args {
2567 	char	*path;
2568 	int	mode;
2569 };
2570 #endif
2571 int
2572 lchmod(td, uap)
2573 	struct thread *td;
2574 	register struct lchmod_args /* {
2575 		char *path;
2576 		int mode;
2577 	} */ *uap;
2578 {
2579 	int error;
2580 	struct nameidata nd;
2581 	int vfslocked;
2582 
2583 	AUDIT_ARG(mode, (mode_t)uap->mode);
2584 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2585 	    uap->path, td);
2586 	if ((error = namei(&nd)) != 0)
2587 		return (error);
2588 	vfslocked = NDHASGIANT(&nd);
2589 	NDFREE(&nd, NDF_ONLY_PNBUF);
2590 	error = setfmode(td, nd.ni_vp, uap->mode);
2591 	vrele(nd.ni_vp);
2592 	VFS_UNLOCK_GIANT(vfslocked);
2593 	return (error);
2594 }
2595 
2596 /*
2597  * Change mode of a file given a file descriptor.
2598  */
2599 #ifndef _SYS_SYSPROTO_H_
2600 struct fchmod_args {
2601 	int	fd;
2602 	int	mode;
2603 };
2604 #endif
2605 int
2606 fchmod(td, uap)
2607 	struct thread *td;
2608 	register struct fchmod_args /* {
2609 		int fd;
2610 		int mode;
2611 	} */ *uap;
2612 {
2613 	struct file *fp;
2614 	int vfslocked;
2615 	int error;
2616 
2617 	AUDIT_ARG(fd, uap->fd);
2618 	AUDIT_ARG(mode, uap->mode);
2619 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2620 		return (error);
2621 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2622 #ifdef AUDIT
2623 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2624 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2625 	VOP_UNLOCK(fp->f_vnode, 0, td);
2626 #endif
2627 	error = setfmode(td, fp->f_vnode, uap->mode);
2628 	VFS_UNLOCK_GIANT(vfslocked);
2629 	fdrop(fp, td);
2630 	return (error);
2631 }
2632 
2633 /*
2634  * Common implementation for chown(), lchown(), and fchown()
2635  */
2636 static int
2637 setfown(td, vp, uid, gid)
2638 	struct thread *td;
2639 	struct vnode *vp;
2640 	uid_t uid;
2641 	gid_t gid;
2642 {
2643 	int error;
2644 	struct mount *mp;
2645 	struct vattr vattr;
2646 
2647 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2648 		return (error);
2649 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2650 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2651 	VATTR_NULL(&vattr);
2652 	vattr.va_uid = uid;
2653 	vattr.va_gid = gid;
2654 #ifdef MAC
2655 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2656 	    vattr.va_gid);
2657 	if (error == 0)
2658 #endif
2659 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2660 	VOP_UNLOCK(vp, 0, td);
2661 	vn_finished_write(mp);
2662 	return (error);
2663 }
2664 
2665 /*
2666  * Set ownership given a path name.
2667  */
2668 #ifndef _SYS_SYSPROTO_H_
2669 struct chown_args {
2670 	char	*path;
2671 	int	uid;
2672 	int	gid;
2673 };
2674 #endif
2675 int
2676 chown(td, uap)
2677 	struct thread *td;
2678 	register struct chown_args /* {
2679 		char *path;
2680 		int uid;
2681 		int gid;
2682 	} */ *uap;
2683 {
2684 
2685 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2686 }
2687 
2688 int
2689 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2690     int gid)
2691 {
2692 	int error;
2693 	struct nameidata nd;
2694 	int vfslocked;
2695 
2696 	AUDIT_ARG(owner, uid, gid);
2697 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2698 	if ((error = namei(&nd)) != 0)
2699 		return (error);
2700 	vfslocked = NDHASGIANT(&nd);
2701 	NDFREE(&nd, NDF_ONLY_PNBUF);
2702 	error = setfown(td, nd.ni_vp, uid, gid);
2703 	vrele(nd.ni_vp);
2704 	VFS_UNLOCK_GIANT(vfslocked);
2705 	return (error);
2706 }
2707 
2708 /*
2709  * Set ownership given a path name, do not cross symlinks.
2710  */
2711 #ifndef _SYS_SYSPROTO_H_
2712 struct lchown_args {
2713 	char	*path;
2714 	int	uid;
2715 	int	gid;
2716 };
2717 #endif
2718 int
2719 lchown(td, uap)
2720 	struct thread *td;
2721 	register struct lchown_args /* {
2722 		char *path;
2723 		int uid;
2724 		int gid;
2725 	} */ *uap;
2726 {
2727 
2728 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2729 }
2730 
2731 int
2732 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2733     int gid)
2734 {
2735 	int error;
2736 	struct nameidata nd;
2737 	int vfslocked;
2738 
2739 	AUDIT_ARG(owner, uid, gid);
2740 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2741 	if ((error = namei(&nd)) != 0)
2742 		return (error);
2743 	vfslocked = NDHASGIANT(&nd);
2744 	NDFREE(&nd, NDF_ONLY_PNBUF);
2745 	error = setfown(td, nd.ni_vp, uid, gid);
2746 	vrele(nd.ni_vp);
2747 	VFS_UNLOCK_GIANT(vfslocked);
2748 	return (error);
2749 }
2750 
2751 /*
2752  * Set ownership given a file descriptor.
2753  */
2754 #ifndef _SYS_SYSPROTO_H_
2755 struct fchown_args {
2756 	int	fd;
2757 	int	uid;
2758 	int	gid;
2759 };
2760 #endif
2761 int
2762 fchown(td, uap)
2763 	struct thread *td;
2764 	register struct fchown_args /* {
2765 		int fd;
2766 		int uid;
2767 		int gid;
2768 	} */ *uap;
2769 {
2770 	struct file *fp;
2771 	int vfslocked;
2772 	int error;
2773 
2774 	AUDIT_ARG(fd, uap->fd);
2775 	AUDIT_ARG(owner, uap->uid, uap->gid);
2776 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2777 		return (error);
2778 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2779 #ifdef AUDIT
2780 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2781 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2782 	VOP_UNLOCK(fp->f_vnode, 0, td);
2783 #endif
2784 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2785 	VFS_UNLOCK_GIANT(vfslocked);
2786 	fdrop(fp, td);
2787 	return (error);
2788 }
2789 
2790 /*
2791  * Common implementation code for utimes(), lutimes(), and futimes().
2792  */
2793 static int
2794 getutimes(usrtvp, tvpseg, tsp)
2795 	const struct timeval *usrtvp;
2796 	enum uio_seg tvpseg;
2797 	struct timespec *tsp;
2798 {
2799 	struct timeval tv[2];
2800 	const struct timeval *tvp;
2801 	int error;
2802 
2803 	if (usrtvp == NULL) {
2804 		microtime(&tv[0]);
2805 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2806 		tsp[1] = tsp[0];
2807 	} else {
2808 		if (tvpseg == UIO_SYSSPACE) {
2809 			tvp = usrtvp;
2810 		} else {
2811 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2812 				return (error);
2813 			tvp = tv;
2814 		}
2815 
2816 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2817 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2818 			return (EINVAL);
2819 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2820 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2821 	}
2822 	return (0);
2823 }
2824 
2825 /*
2826  * Common implementation code for utimes(), lutimes(), and futimes().
2827  */
2828 static int
2829 setutimes(td, vp, ts, numtimes, nullflag)
2830 	struct thread *td;
2831 	struct vnode *vp;
2832 	const struct timespec *ts;
2833 	int numtimes;
2834 	int nullflag;
2835 {
2836 	int error, setbirthtime;
2837 	struct mount *mp;
2838 	struct vattr vattr;
2839 
2840 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2841 		return (error);
2842 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2843 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2844 	setbirthtime = 0;
2845 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2846 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2847 		setbirthtime = 1;
2848 	VATTR_NULL(&vattr);
2849 	vattr.va_atime = ts[0];
2850 	vattr.va_mtime = ts[1];
2851 	if (setbirthtime)
2852 		vattr.va_birthtime = ts[1];
2853 	if (numtimes > 2)
2854 		vattr.va_birthtime = ts[2];
2855 	if (nullflag)
2856 		vattr.va_vaflags |= VA_UTIMES_NULL;
2857 #ifdef MAC
2858 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2859 	    vattr.va_mtime);
2860 #endif
2861 	if (error == 0)
2862 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2863 	VOP_UNLOCK(vp, 0, td);
2864 	vn_finished_write(mp);
2865 	return (error);
2866 }
2867 
2868 /*
2869  * Set the access and modification times of a file.
2870  */
2871 #ifndef _SYS_SYSPROTO_H_
2872 struct utimes_args {
2873 	char	*path;
2874 	struct	timeval *tptr;
2875 };
2876 #endif
2877 int
2878 utimes(td, uap)
2879 	struct thread *td;
2880 	register struct utimes_args /* {
2881 		char *path;
2882 		struct timeval *tptr;
2883 	} */ *uap;
2884 {
2885 
2886 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2887 	    UIO_USERSPACE));
2888 }
2889 
2890 int
2891 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2892     struct timeval *tptr, enum uio_seg tptrseg)
2893 {
2894 	struct timespec ts[2];
2895 	int error;
2896 	struct nameidata nd;
2897 	int vfslocked;
2898 
2899 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2900 		return (error);
2901 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2902 	if ((error = namei(&nd)) != 0)
2903 		return (error);
2904 	vfslocked = NDHASGIANT(&nd);
2905 	NDFREE(&nd, NDF_ONLY_PNBUF);
2906 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2907 	vrele(nd.ni_vp);
2908 	VFS_UNLOCK_GIANT(vfslocked);
2909 	return (error);
2910 }
2911 
2912 /*
2913  * Set the access and modification times of a file.
2914  */
2915 #ifndef _SYS_SYSPROTO_H_
2916 struct lutimes_args {
2917 	char	*path;
2918 	struct	timeval *tptr;
2919 };
2920 #endif
2921 int
2922 lutimes(td, uap)
2923 	struct thread *td;
2924 	register struct lutimes_args /* {
2925 		char *path;
2926 		struct timeval *tptr;
2927 	} */ *uap;
2928 {
2929 
2930 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2931 	    UIO_USERSPACE));
2932 }
2933 
2934 int
2935 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2936     struct timeval *tptr, enum uio_seg tptrseg)
2937 {
2938 	struct timespec ts[2];
2939 	int error;
2940 	struct nameidata nd;
2941 	int vfslocked;
2942 
2943 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2944 		return (error);
2945 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2946 	if ((error = namei(&nd)) != 0)
2947 		return (error);
2948 	vfslocked = NDHASGIANT(&nd);
2949 	NDFREE(&nd, NDF_ONLY_PNBUF);
2950 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2951 	vrele(nd.ni_vp);
2952 	VFS_UNLOCK_GIANT(vfslocked);
2953 	return (error);
2954 }
2955 
2956 /*
2957  * Set the access and modification times of a file.
2958  */
2959 #ifndef _SYS_SYSPROTO_H_
2960 struct futimes_args {
2961 	int	fd;
2962 	struct	timeval *tptr;
2963 };
2964 #endif
2965 int
2966 futimes(td, uap)
2967 	struct thread *td;
2968 	register struct futimes_args /* {
2969 		int  fd;
2970 		struct timeval *tptr;
2971 	} */ *uap;
2972 {
2973 
2974 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2975 }
2976 
2977 int
2978 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2979     enum uio_seg tptrseg)
2980 {
2981 	struct timespec ts[2];
2982 	struct file *fp;
2983 	int vfslocked;
2984 	int error;
2985 
2986 	AUDIT_ARG(fd, fd);
2987 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2988 		return (error);
2989 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2990 		return (error);
2991 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2992 #ifdef AUDIT
2993 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2994 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2995 	VOP_UNLOCK(fp->f_vnode, 0, td);
2996 #endif
2997 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2998 	VFS_UNLOCK_GIANT(vfslocked);
2999 	fdrop(fp, td);
3000 	return (error);
3001 }
3002 
3003 /*
3004  * Truncate a file given its path name.
3005  */
3006 #ifndef _SYS_SYSPROTO_H_
3007 struct truncate_args {
3008 	char	*path;
3009 	int	pad;
3010 	off_t	length;
3011 };
3012 #endif
3013 int
3014 truncate(td, uap)
3015 	struct thread *td;
3016 	register struct truncate_args /* {
3017 		char *path;
3018 		int pad;
3019 		off_t length;
3020 	} */ *uap;
3021 {
3022 
3023 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3024 }
3025 
3026 int
3027 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3028 {
3029 	struct mount *mp;
3030 	struct vnode *vp;
3031 	struct vattr vattr;
3032 	int error;
3033 	struct nameidata nd;
3034 	int vfslocked;
3035 
3036 	if (length < 0)
3037 		return(EINVAL);
3038 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3039 	if ((error = namei(&nd)) != 0)
3040 		return (error);
3041 	vfslocked = NDHASGIANT(&nd);
3042 	vp = nd.ni_vp;
3043 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3044 		vrele(vp);
3045 		VFS_UNLOCK_GIANT(vfslocked);
3046 		return (error);
3047 	}
3048 	NDFREE(&nd, NDF_ONLY_PNBUF);
3049 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3050 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3051 	if (vp->v_type == VDIR)
3052 		error = EISDIR;
3053 #ifdef MAC
3054 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3055 	}
3056 #endif
3057 	else if ((error = vn_writechk(vp)) == 0 &&
3058 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3059 		VATTR_NULL(&vattr);
3060 		vattr.va_size = length;
3061 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3062 	}
3063 	vput(vp);
3064 	vn_finished_write(mp);
3065 	VFS_UNLOCK_GIANT(vfslocked);
3066 	return (error);
3067 }
3068 
3069 /*
3070  * Truncate a file given a file descriptor.
3071  */
3072 #ifndef _SYS_SYSPROTO_H_
3073 struct ftruncate_args {
3074 	int	fd;
3075 	int	pad;
3076 	off_t	length;
3077 };
3078 #endif
3079 int
3080 ftruncate(td, uap)
3081 	struct thread *td;
3082 	register struct ftruncate_args /* {
3083 		int fd;
3084 		int pad;
3085 		off_t length;
3086 	} */ *uap;
3087 {
3088 	struct mount *mp;
3089 	struct vattr vattr;
3090 	struct vnode *vp;
3091 	struct file *fp;
3092 	int vfslocked;
3093 	int error;
3094 
3095 	AUDIT_ARG(fd, uap->fd);
3096 	if (uap->length < 0)
3097 		return(EINVAL);
3098 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3099 		return (error);
3100 	if ((fp->f_flag & FWRITE) == 0) {
3101 		fdrop(fp, td);
3102 		return (EINVAL);
3103 	}
3104 	vp = fp->f_vnode;
3105 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3106 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3107 		goto drop;
3108 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3109 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3110 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3111 	if (vp->v_type == VDIR)
3112 		error = EISDIR;
3113 #ifdef MAC
3114 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3115 	    vp))) {
3116 	}
3117 #endif
3118 	else if ((error = vn_writechk(vp)) == 0) {
3119 		VATTR_NULL(&vattr);
3120 		vattr.va_size = uap->length;
3121 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3122 	}
3123 	VOP_UNLOCK(vp, 0, td);
3124 	vn_finished_write(mp);
3125 drop:
3126 	VFS_UNLOCK_GIANT(vfslocked);
3127 	fdrop(fp, td);
3128 	return (error);
3129 }
3130 
3131 #if defined(COMPAT_43)
3132 /*
3133  * Truncate a file given its path name.
3134  */
3135 #ifndef _SYS_SYSPROTO_H_
3136 struct otruncate_args {
3137 	char	*path;
3138 	long	length;
3139 };
3140 #endif
3141 int
3142 otruncate(td, uap)
3143 	struct thread *td;
3144 	register struct otruncate_args /* {
3145 		char *path;
3146 		long length;
3147 	} */ *uap;
3148 {
3149 	struct truncate_args /* {
3150 		char *path;
3151 		int pad;
3152 		off_t length;
3153 	} */ nuap;
3154 
3155 	nuap.path = uap->path;
3156 	nuap.length = uap->length;
3157 	return (truncate(td, &nuap));
3158 }
3159 
3160 /*
3161  * Truncate a file given a file descriptor.
3162  */
3163 #ifndef _SYS_SYSPROTO_H_
3164 struct oftruncate_args {
3165 	int	fd;
3166 	long	length;
3167 };
3168 #endif
3169 int
3170 oftruncate(td, uap)
3171 	struct thread *td;
3172 	register struct oftruncate_args /* {
3173 		int fd;
3174 		long length;
3175 	} */ *uap;
3176 {
3177 	struct ftruncate_args /* {
3178 		int fd;
3179 		int pad;
3180 		off_t length;
3181 	} */ nuap;
3182 
3183 	nuap.fd = uap->fd;
3184 	nuap.length = uap->length;
3185 	return (ftruncate(td, &nuap));
3186 }
3187 #endif /* COMPAT_43 */
3188 
3189 /*
3190  * Sync an open file.
3191  */
3192 #ifndef _SYS_SYSPROTO_H_
3193 struct fsync_args {
3194 	int	fd;
3195 };
3196 #endif
3197 int
3198 fsync(td, uap)
3199 	struct thread *td;
3200 	struct fsync_args /* {
3201 		int fd;
3202 	} */ *uap;
3203 {
3204 	struct vnode *vp;
3205 	struct mount *mp;
3206 	struct file *fp;
3207 	int vfslocked;
3208 	int error;
3209 
3210 	AUDIT_ARG(fd, uap->fd);
3211 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3212 		return (error);
3213 	vp = fp->f_vnode;
3214 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3215 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3216 		goto drop;
3217 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3218 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3219 	if (vp->v_object != NULL) {
3220 		VM_OBJECT_LOCK(vp->v_object);
3221 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3222 		VM_OBJECT_UNLOCK(vp->v_object);
3223 	}
3224 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3225 
3226 	VOP_UNLOCK(vp, 0, td);
3227 	vn_finished_write(mp);
3228 drop:
3229 	VFS_UNLOCK_GIANT(vfslocked);
3230 	fdrop(fp, td);
3231 	return (error);
3232 }
3233 
3234 /*
3235  * Rename files.  Source and destination must either both be directories,
3236  * or both not be directories.  If target is a directory, it must be empty.
3237  */
3238 #ifndef _SYS_SYSPROTO_H_
3239 struct rename_args {
3240 	char	*from;
3241 	char	*to;
3242 };
3243 #endif
3244 int
3245 rename(td, uap)
3246 	struct thread *td;
3247 	register struct rename_args /* {
3248 		char *from;
3249 		char *to;
3250 	} */ *uap;
3251 {
3252 
3253 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3254 }
3255 
3256 int
3257 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3258 {
3259 	struct mount *mp = NULL;
3260 	struct vnode *tvp, *fvp, *tdvp;
3261 	struct nameidata fromnd, tond;
3262 	int tvfslocked;
3263 	int fvfslocked;
3264 	int error;
3265 
3266 	bwillwrite();
3267 #ifdef MAC
3268 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3269 	    AUDITVNODE1, pathseg, from, td);
3270 #else
3271 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3272 	    AUDITVNODE1, pathseg, from, td);
3273 #endif
3274 	if ((error = namei(&fromnd)) != 0)
3275 		return (error);
3276 	fvfslocked = NDHASGIANT(&fromnd);
3277 	tvfslocked = 0;
3278 #ifdef MAC
3279 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3280 	    fromnd.ni_vp, &fromnd.ni_cnd);
3281 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3282 	if (fromnd.ni_dvp != fromnd.ni_vp)
3283 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3284 #endif
3285 	fvp = fromnd.ni_vp;
3286 	if (error == 0)
3287 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3288 	if (error != 0) {
3289 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3290 		vrele(fromnd.ni_dvp);
3291 		vrele(fvp);
3292 		goto out1;
3293 	}
3294 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3295 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3296 	if (fromnd.ni_vp->v_type == VDIR)
3297 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3298 	if ((error = namei(&tond)) != 0) {
3299 		/* Translate error code for rename("dir1", "dir2/."). */
3300 		if (error == EISDIR && fvp->v_type == VDIR)
3301 			error = EINVAL;
3302 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3303 		vrele(fromnd.ni_dvp);
3304 		vrele(fvp);
3305 		vn_finished_write(mp);
3306 		goto out1;
3307 	}
3308 	tvfslocked = NDHASGIANT(&tond);
3309 	tdvp = tond.ni_dvp;
3310 	tvp = tond.ni_vp;
3311 	if (tvp != NULL) {
3312 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3313 			error = ENOTDIR;
3314 			goto out;
3315 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3316 			error = EISDIR;
3317 			goto out;
3318 		}
3319 	}
3320 	if (fvp == tdvp)
3321 		error = EINVAL;
3322 	/*
3323 	 * If the source is the same as the destination (that is, if they
3324 	 * are links to the same vnode), then there is nothing to do.
3325 	 */
3326 	if (fvp == tvp)
3327 		error = -1;
3328 #ifdef MAC
3329 	else
3330 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3331 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3332 #endif
3333 out:
3334 	if (!error) {
3335 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3336 		if (fromnd.ni_dvp != tdvp) {
3337 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3338 		}
3339 		if (tvp) {
3340 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3341 		}
3342 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3343 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3344 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3345 		NDFREE(&tond, NDF_ONLY_PNBUF);
3346 	} else {
3347 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3348 		NDFREE(&tond, NDF_ONLY_PNBUF);
3349 		if (tvp)
3350 			vput(tvp);
3351 		if (tdvp == tvp)
3352 			vrele(tdvp);
3353 		else
3354 			vput(tdvp);
3355 		vrele(fromnd.ni_dvp);
3356 		vrele(fvp);
3357 	}
3358 	vrele(tond.ni_startdir);
3359 	vn_finished_write(mp);
3360 out1:
3361 	if (fromnd.ni_startdir)
3362 		vrele(fromnd.ni_startdir);
3363 	VFS_UNLOCK_GIANT(fvfslocked);
3364 	VFS_UNLOCK_GIANT(tvfslocked);
3365 	if (error == -1)
3366 		return (0);
3367 	return (error);
3368 }
3369 
3370 /*
3371  * Make a directory file.
3372  */
3373 #ifndef _SYS_SYSPROTO_H_
3374 struct mkdir_args {
3375 	char	*path;
3376 	int	mode;
3377 };
3378 #endif
3379 int
3380 mkdir(td, uap)
3381 	struct thread *td;
3382 	register struct mkdir_args /* {
3383 		char *path;
3384 		int mode;
3385 	} */ *uap;
3386 {
3387 
3388 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3389 }
3390 
3391 int
3392 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3393 {
3394 	struct mount *mp;
3395 	struct vnode *vp;
3396 	struct vattr vattr;
3397 	int error;
3398 	struct nameidata nd;
3399 	int vfslocked;
3400 
3401 	AUDIT_ARG(mode, mode);
3402 restart:
3403 	bwillwrite();
3404 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3405 	    segflg, path, td);
3406 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3407 	if ((error = namei(&nd)) != 0)
3408 		return (error);
3409 	vfslocked = NDHASGIANT(&nd);
3410 	vp = nd.ni_vp;
3411 	if (vp != NULL) {
3412 		NDFREE(&nd, NDF_ONLY_PNBUF);
3413 		/*
3414 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3415 		 * the strange behaviour of leaving the vnode unlocked
3416 		 * if the target is the same vnode as the parent.
3417 		 */
3418 		if (vp == nd.ni_dvp)
3419 			vrele(nd.ni_dvp);
3420 		else
3421 			vput(nd.ni_dvp);
3422 		vrele(vp);
3423 		VFS_UNLOCK_GIANT(vfslocked);
3424 		return (EEXIST);
3425 	}
3426 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3427 		NDFREE(&nd, NDF_ONLY_PNBUF);
3428 		vput(nd.ni_dvp);
3429 		VFS_UNLOCK_GIANT(vfslocked);
3430 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3431 			return (error);
3432 		goto restart;
3433 	}
3434 	VATTR_NULL(&vattr);
3435 	vattr.va_type = VDIR;
3436 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3437 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3438 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3439 #ifdef MAC
3440 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3441 	    &vattr);
3442 	if (error)
3443 		goto out;
3444 #endif
3445 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3446 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3447 #ifdef MAC
3448 out:
3449 #endif
3450 	NDFREE(&nd, NDF_ONLY_PNBUF);
3451 	vput(nd.ni_dvp);
3452 	if (!error)
3453 		vput(nd.ni_vp);
3454 	vn_finished_write(mp);
3455 	VFS_UNLOCK_GIANT(vfslocked);
3456 	return (error);
3457 }
3458 
3459 /*
3460  * Remove a directory file.
3461  */
3462 #ifndef _SYS_SYSPROTO_H_
3463 struct rmdir_args {
3464 	char	*path;
3465 };
3466 #endif
3467 int
3468 rmdir(td, uap)
3469 	struct thread *td;
3470 	struct rmdir_args /* {
3471 		char *path;
3472 	} */ *uap;
3473 {
3474 
3475 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3476 }
3477 
3478 int
3479 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3480 {
3481 	struct mount *mp;
3482 	struct vnode *vp;
3483 	int error;
3484 	struct nameidata nd;
3485 	int vfslocked;
3486 
3487 restart:
3488 	bwillwrite();
3489 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3490 	    pathseg, path, td);
3491 	if ((error = namei(&nd)) != 0)
3492 		return (error);
3493 	vfslocked = NDHASGIANT(&nd);
3494 	vp = nd.ni_vp;
3495 	if (vp->v_type != VDIR) {
3496 		error = ENOTDIR;
3497 		goto out;
3498 	}
3499 	/*
3500 	 * No rmdir "." please.
3501 	 */
3502 	if (nd.ni_dvp == vp) {
3503 		error = EINVAL;
3504 		goto out;
3505 	}
3506 	/*
3507 	 * The root of a mounted filesystem cannot be deleted.
3508 	 */
3509 	if (vp->v_vflag & VV_ROOT) {
3510 		error = EBUSY;
3511 		goto out;
3512 	}
3513 #ifdef MAC
3514 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3515 	    &nd.ni_cnd);
3516 	if (error)
3517 		goto out;
3518 #endif
3519 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3520 		NDFREE(&nd, NDF_ONLY_PNBUF);
3521 		vput(vp);
3522 		if (nd.ni_dvp == vp)
3523 			vrele(nd.ni_dvp);
3524 		else
3525 			vput(nd.ni_dvp);
3526 		VFS_UNLOCK_GIANT(vfslocked);
3527 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3528 			return (error);
3529 		goto restart;
3530 	}
3531 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3532 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3533 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3534 	vn_finished_write(mp);
3535 out:
3536 	NDFREE(&nd, NDF_ONLY_PNBUF);
3537 	vput(vp);
3538 	if (nd.ni_dvp == vp)
3539 		vrele(nd.ni_dvp);
3540 	else
3541 		vput(nd.ni_dvp);
3542 	VFS_UNLOCK_GIANT(vfslocked);
3543 	return (error);
3544 }
3545 
3546 #ifdef COMPAT_43
3547 /*
3548  * Read a block of directory entries in a filesystem independent format.
3549  */
3550 #ifndef _SYS_SYSPROTO_H_
3551 struct ogetdirentries_args {
3552 	int	fd;
3553 	char	*buf;
3554 	u_int	count;
3555 	long	*basep;
3556 };
3557 #endif
3558 int
3559 ogetdirentries(td, uap)
3560 	struct thread *td;
3561 	register struct ogetdirentries_args /* {
3562 		int fd;
3563 		char *buf;
3564 		u_int count;
3565 		long *basep;
3566 	} */ *uap;
3567 {
3568 	struct vnode *vp;
3569 	struct file *fp;
3570 	struct uio auio, kuio;
3571 	struct iovec aiov, kiov;
3572 	struct dirent *dp, *edp;
3573 	caddr_t dirbuf;
3574 	int error, eofflag, readcnt, vfslocked;
3575 	long loff;
3576 
3577 	/* XXX arbitrary sanity limit on `count'. */
3578 	if (uap->count > 64 * 1024)
3579 		return (EINVAL);
3580 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3581 		return (error);
3582 	if ((fp->f_flag & FREAD) == 0) {
3583 		fdrop(fp, td);
3584 		return (EBADF);
3585 	}
3586 	vp = fp->f_vnode;
3587 unionread:
3588 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3589 	if (vp->v_type != VDIR) {
3590 		VFS_UNLOCK_GIANT(vfslocked);
3591 		fdrop(fp, td);
3592 		return (EINVAL);
3593 	}
3594 	aiov.iov_base = uap->buf;
3595 	aiov.iov_len = uap->count;
3596 	auio.uio_iov = &aiov;
3597 	auio.uio_iovcnt = 1;
3598 	auio.uio_rw = UIO_READ;
3599 	auio.uio_segflg = UIO_USERSPACE;
3600 	auio.uio_td = td;
3601 	auio.uio_resid = uap->count;
3602 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3603 	loff = auio.uio_offset = fp->f_offset;
3604 #ifdef MAC
3605 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3606 	if (error) {
3607 		VOP_UNLOCK(vp, 0, td);
3608 		VFS_UNLOCK_GIANT(vfslocked);
3609 		fdrop(fp, td);
3610 		return (error);
3611 	}
3612 #endif
3613 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3614 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3615 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3616 			    NULL, NULL);
3617 			fp->f_offset = auio.uio_offset;
3618 		} else
3619 #	endif
3620 	{
3621 		kuio = auio;
3622 		kuio.uio_iov = &kiov;
3623 		kuio.uio_segflg = UIO_SYSSPACE;
3624 		kiov.iov_len = uap->count;
3625 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3626 		kiov.iov_base = dirbuf;
3627 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3628 			    NULL, NULL);
3629 		fp->f_offset = kuio.uio_offset;
3630 		if (error == 0) {
3631 			readcnt = uap->count - kuio.uio_resid;
3632 			edp = (struct dirent *)&dirbuf[readcnt];
3633 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3634 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3635 					/*
3636 					 * The expected low byte of
3637 					 * dp->d_namlen is our dp->d_type.
3638 					 * The high MBZ byte of dp->d_namlen
3639 					 * is our dp->d_namlen.
3640 					 */
3641 					dp->d_type = dp->d_namlen;
3642 					dp->d_namlen = 0;
3643 #				else
3644 					/*
3645 					 * The dp->d_type is the high byte
3646 					 * of the expected dp->d_namlen,
3647 					 * so must be zero'ed.
3648 					 */
3649 					dp->d_type = 0;
3650 #				endif
3651 				if (dp->d_reclen > 0) {
3652 					dp = (struct dirent *)
3653 					    ((char *)dp + dp->d_reclen);
3654 				} else {
3655 					error = EIO;
3656 					break;
3657 				}
3658 			}
3659 			if (dp >= edp)
3660 				error = uiomove(dirbuf, readcnt, &auio);
3661 		}
3662 		FREE(dirbuf, M_TEMP);
3663 	}
3664 	VOP_UNLOCK(vp, 0, td);
3665 	if (error) {
3666 		VFS_UNLOCK_GIANT(vfslocked);
3667 		fdrop(fp, td);
3668 		return (error);
3669 	}
3670 	if (uap->count == auio.uio_resid) {
3671 		if (union_dircheckp) {
3672 			error = union_dircheckp(td, &vp, fp);
3673 			if (error == -1) {
3674 				VFS_UNLOCK_GIANT(vfslocked);
3675 				goto unionread;
3676 			}
3677 			if (error) {
3678 				VFS_UNLOCK_GIANT(vfslocked);
3679 				fdrop(fp, td);
3680 				return (error);
3681 			}
3682 		}
3683 		/*
3684 		 * XXX We could delay dropping the lock above but
3685 		 * union_dircheckp complicates things.
3686 		 */
3687 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3688 		if ((vp->v_vflag & VV_ROOT) &&
3689 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3690 			struct vnode *tvp = vp;
3691 			vp = vp->v_mount->mnt_vnodecovered;
3692 			VREF(vp);
3693 			fp->f_vnode = vp;
3694 			fp->f_data = vp;
3695 			fp->f_offset = 0;
3696 			vput(tvp);
3697 			VFS_UNLOCK_GIANT(vfslocked);
3698 			goto unionread;
3699 		}
3700 		VOP_UNLOCK(vp, 0, td);
3701 	}
3702 	VFS_UNLOCK_GIANT(vfslocked);
3703 	error = copyout(&loff, uap->basep, sizeof(long));
3704 	fdrop(fp, td);
3705 	td->td_retval[0] = uap->count - auio.uio_resid;
3706 	return (error);
3707 }
3708 #endif /* COMPAT_43 */
3709 
3710 /*
3711  * Read a block of directory entries in a filesystem independent format.
3712  */
3713 #ifndef _SYS_SYSPROTO_H_
3714 struct getdirentries_args {
3715 	int	fd;
3716 	char	*buf;
3717 	u_int	count;
3718 	long	*basep;
3719 };
3720 #endif
3721 int
3722 getdirentries(td, uap)
3723 	struct thread *td;
3724 	register struct getdirentries_args /* {
3725 		int fd;
3726 		char *buf;
3727 		u_int count;
3728 		long *basep;
3729 	} */ *uap;
3730 {
3731 	struct vnode *vp;
3732 	struct file *fp;
3733 	struct uio auio;
3734 	struct iovec aiov;
3735 	int vfslocked;
3736 	long loff;
3737 	int error, eofflag;
3738 
3739 	AUDIT_ARG(fd, uap->fd);
3740 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3741 		return (error);
3742 	if ((fp->f_flag & FREAD) == 0) {
3743 		fdrop(fp, td);
3744 		return (EBADF);
3745 	}
3746 	vp = fp->f_vnode;
3747 unionread:
3748 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3749 	if (vp->v_type != VDIR) {
3750 		error = EINVAL;
3751 		goto fail;
3752 	}
3753 	aiov.iov_base = uap->buf;
3754 	aiov.iov_len = uap->count;
3755 	auio.uio_iov = &aiov;
3756 	auio.uio_iovcnt = 1;
3757 	auio.uio_rw = UIO_READ;
3758 	auio.uio_segflg = UIO_USERSPACE;
3759 	auio.uio_td = td;
3760 	auio.uio_resid = uap->count;
3761 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3762 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3763 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3764 	loff = auio.uio_offset = fp->f_offset;
3765 #ifdef MAC
3766 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3767 	if (error == 0)
3768 #endif
3769 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3770 		    NULL);
3771 	fp->f_offset = auio.uio_offset;
3772 	VOP_UNLOCK(vp, 0, td);
3773 	if (error)
3774 		goto fail;
3775 	if (uap->count == auio.uio_resid) {
3776 		if (union_dircheckp) {
3777 			error = union_dircheckp(td, &vp, fp);
3778 			if (error == -1) {
3779 				VFS_UNLOCK_GIANT(vfslocked);
3780 				goto unionread;
3781 			}
3782 			if (error)
3783 				goto fail;
3784 		}
3785 		/*
3786 		 * XXX We could delay dropping the lock above but
3787 		 * union_dircheckp complicates things.
3788 		 */
3789 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3790 		if ((vp->v_vflag & VV_ROOT) &&
3791 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3792 			struct vnode *tvp = vp;
3793 			vp = vp->v_mount->mnt_vnodecovered;
3794 			VREF(vp);
3795 			fp->f_vnode = vp;
3796 			fp->f_data = vp;
3797 			fp->f_offset = 0;
3798 			vput(tvp);
3799 			VFS_UNLOCK_GIANT(vfslocked);
3800 			goto unionread;
3801 		}
3802 		VOP_UNLOCK(vp, 0, td);
3803 	}
3804 	if (uap->basep != NULL) {
3805 		error = copyout(&loff, uap->basep, sizeof(long));
3806 	}
3807 	td->td_retval[0] = uap->count - auio.uio_resid;
3808 fail:
3809 	VFS_UNLOCK_GIANT(vfslocked);
3810 	fdrop(fp, td);
3811 	return (error);
3812 }
3813 #ifndef _SYS_SYSPROTO_H_
3814 struct getdents_args {
3815 	int fd;
3816 	char *buf;
3817 	size_t count;
3818 };
3819 #endif
3820 int
3821 getdents(td, uap)
3822 	struct thread *td;
3823 	register struct getdents_args /* {
3824 		int fd;
3825 		char *buf;
3826 		u_int count;
3827 	} */ *uap;
3828 {
3829 	struct getdirentries_args ap;
3830 	ap.fd = uap->fd;
3831 	ap.buf = uap->buf;
3832 	ap.count = uap->count;
3833 	ap.basep = NULL;
3834 	return (getdirentries(td, &ap));
3835 }
3836 
3837 /*
3838  * Set the mode mask for creation of filesystem nodes.
3839  *
3840  * MP SAFE
3841  */
3842 #ifndef _SYS_SYSPROTO_H_
3843 struct umask_args {
3844 	int	newmask;
3845 };
3846 #endif
3847 int
3848 umask(td, uap)
3849 	struct thread *td;
3850 	struct umask_args /* {
3851 		int newmask;
3852 	} */ *uap;
3853 {
3854 	register struct filedesc *fdp;
3855 
3856 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3857 	fdp = td->td_proc->p_fd;
3858 	td->td_retval[0] = fdp->fd_cmask;
3859 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3860 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3861 	return (0);
3862 }
3863 
3864 /*
3865  * Void all references to file by ripping underlying filesystem
3866  * away from vnode.
3867  */
3868 #ifndef _SYS_SYSPROTO_H_
3869 struct revoke_args {
3870 	char	*path;
3871 };
3872 #endif
3873 int
3874 revoke(td, uap)
3875 	struct thread *td;
3876 	register struct revoke_args /* {
3877 		char *path;
3878 	} */ *uap;
3879 {
3880 	struct vnode *vp;
3881 	struct vattr vattr;
3882 	int error;
3883 	struct nameidata nd;
3884 	int vfslocked;
3885 
3886 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3887 	    UIO_USERSPACE, uap->path, td);
3888 	if ((error = namei(&nd)) != 0)
3889 		return (error);
3890 	vfslocked = NDHASGIANT(&nd);
3891 	vp = nd.ni_vp;
3892 	NDFREE(&nd, NDF_ONLY_PNBUF);
3893 	if (vp->v_type != VCHR) {
3894 		error = EINVAL;
3895 		goto out;
3896 	}
3897 #ifdef MAC
3898 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3899 	if (error)
3900 		goto out;
3901 #endif
3902 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3903 	if (error)
3904 		goto out;
3905 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3906 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN,
3907 		    SUSER_ALLOWJAIL);
3908 		if (error)
3909 			goto out;
3910 	}
3911 	if (vcount(vp) > 1)
3912 		VOP_REVOKE(vp, REVOKEALL);
3913 out:
3914 	vput(vp);
3915 	VFS_UNLOCK_GIANT(vfslocked);
3916 	return (error);
3917 }
3918 
3919 /*
3920  * Convert a user file descriptor to a kernel file entry.
3921  * A reference on the file entry is held upon returning.
3922  */
3923 int
3924 getvnode(fdp, fd, fpp)
3925 	struct filedesc *fdp;
3926 	int fd;
3927 	struct file **fpp;
3928 {
3929 	int error;
3930 	struct file *fp;
3931 
3932 	fp = NULL;
3933 	if (fdp == NULL)
3934 		error = EBADF;
3935 	else {
3936 		FILEDESC_LOCK(fdp);
3937 		if ((u_int)fd >= fdp->fd_nfiles ||
3938 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3939 			error = EBADF;
3940 		else if (fp->f_vnode == NULL) {
3941 			fp = NULL;
3942 			error = EINVAL;
3943 		} else {
3944 			fhold(fp);
3945 			error = 0;
3946 		}
3947 		FILEDESC_UNLOCK(fdp);
3948 	}
3949 	*fpp = fp;
3950 	return (error);
3951 }
3952 
3953 /*
3954  * Get (NFS) file handle
3955  */
3956 #ifndef _SYS_SYSPROTO_H_
3957 struct lgetfh_args {
3958 	char	*fname;
3959 	fhandle_t *fhp;
3960 };
3961 #endif
3962 int
3963 lgetfh(td, uap)
3964 	struct thread *td;
3965 	register struct lgetfh_args *uap;
3966 {
3967 	struct nameidata nd;
3968 	fhandle_t fh;
3969 	register struct vnode *vp;
3970 	int vfslocked;
3971 	int error;
3972 
3973 	error = priv_check(td, PRIV_VFS_GETFH);
3974 	if (error)
3975 		return (error);
3976 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3977 	    UIO_USERSPACE, uap->fname, td);
3978 	error = namei(&nd);
3979 	if (error)
3980 		return (error);
3981 	vfslocked = NDHASGIANT(&nd);
3982 	NDFREE(&nd, NDF_ONLY_PNBUF);
3983 	vp = nd.ni_vp;
3984 	bzero(&fh, sizeof(fh));
3985 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3986 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3987 	vput(vp);
3988 	VFS_UNLOCK_GIANT(vfslocked);
3989 	if (error)
3990 		return (error);
3991 	error = copyout(&fh, uap->fhp, sizeof (fh));
3992 	return (error);
3993 }
3994 
3995 #ifndef _SYS_SYSPROTO_H_
3996 struct getfh_args {
3997 	char	*fname;
3998 	fhandle_t *fhp;
3999 };
4000 #endif
4001 int
4002 getfh(td, uap)
4003 	struct thread *td;
4004 	register struct getfh_args *uap;
4005 {
4006 	struct nameidata nd;
4007 	fhandle_t fh;
4008 	register struct vnode *vp;
4009 	int vfslocked;
4010 	int error;
4011 
4012 	error = priv_check(td, PRIV_VFS_GETFH);
4013 	if (error)
4014 		return (error);
4015 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4016 	    UIO_USERSPACE, uap->fname, td);
4017 	error = namei(&nd);
4018 	if (error)
4019 		return (error);
4020 	vfslocked = NDHASGIANT(&nd);
4021 	NDFREE(&nd, NDF_ONLY_PNBUF);
4022 	vp = nd.ni_vp;
4023 	bzero(&fh, sizeof(fh));
4024 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4025 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4026 	vput(vp);
4027 	VFS_UNLOCK_GIANT(vfslocked);
4028 	if (error)
4029 		return (error);
4030 	error = copyout(&fh, uap->fhp, sizeof (fh));
4031 	return (error);
4032 }
4033 
4034 /*
4035  * syscall for the rpc.lockd to use to translate a NFS file handle into an
4036  * open descriptor.
4037  *
4038  * warning: do not remove the priv_check() call or this becomes one giant
4039  * security hole.
4040  *
4041  * MP SAFE
4042  */
4043 #ifndef _SYS_SYSPROTO_H_
4044 struct fhopen_args {
4045 	const struct fhandle *u_fhp;
4046 	int flags;
4047 };
4048 #endif
4049 int
4050 fhopen(td, uap)
4051 	struct thread *td;
4052 	struct fhopen_args /* {
4053 		const struct fhandle *u_fhp;
4054 		int flags;
4055 	} */ *uap;
4056 {
4057 	struct proc *p = td->td_proc;
4058 	struct mount *mp;
4059 	struct vnode *vp;
4060 	struct fhandle fhp;
4061 	struct vattr vat;
4062 	struct vattr *vap = &vat;
4063 	struct flock lf;
4064 	struct file *fp;
4065 	register struct filedesc *fdp = p->p_fd;
4066 	int fmode, mode, error, type;
4067 	struct file *nfp;
4068 	int vfslocked;
4069 	int indx;
4070 
4071 	error = priv_check(td, PRIV_VFS_FHOPEN);
4072 	if (error)
4073 		return (error);
4074 	fmode = FFLAGS(uap->flags);
4075 	/* why not allow a non-read/write open for our lockd? */
4076 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4077 		return (EINVAL);
4078 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4079 	if (error)
4080 		return(error);
4081 	/* find the mount point */
4082 	mp = vfs_getvfs(&fhp.fh_fsid);
4083 	if (mp == NULL)
4084 		return (ESTALE);
4085 	vfslocked = VFS_LOCK_GIANT(mp);
4086 	/* now give me my vnode, it gets returned to me locked */
4087 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4088 	if (error)
4089 		goto out;
4090 	/*
4091 	 * from now on we have to make sure not
4092 	 * to forget about the vnode
4093 	 * any error that causes an abort must vput(vp)
4094 	 * just set error = err and 'goto bad;'.
4095 	 */
4096 
4097 	/*
4098 	 * from vn_open
4099 	 */
4100 	if (vp->v_type == VLNK) {
4101 		error = EMLINK;
4102 		goto bad;
4103 	}
4104 	if (vp->v_type == VSOCK) {
4105 		error = EOPNOTSUPP;
4106 		goto bad;
4107 	}
4108 	mode = 0;
4109 	if (fmode & (FWRITE | O_TRUNC)) {
4110 		if (vp->v_type == VDIR) {
4111 			error = EISDIR;
4112 			goto bad;
4113 		}
4114 		error = vn_writechk(vp);
4115 		if (error)
4116 			goto bad;
4117 		mode |= VWRITE;
4118 	}
4119 	if (fmode & FREAD)
4120 		mode |= VREAD;
4121 	if (fmode & O_APPEND)
4122 		mode |= VAPPEND;
4123 #ifdef MAC
4124 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4125 	if (error)
4126 		goto bad;
4127 #endif
4128 	if (mode) {
4129 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4130 		if (error)
4131 			goto bad;
4132 	}
4133 	if (fmode & O_TRUNC) {
4134 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4135 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4136 			vrele(vp);
4137 			goto out;
4138 		}
4139 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4140 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4141 #ifdef MAC
4142 		/*
4143 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4144 		 * should be right.
4145 		 */
4146 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4147 		if (error == 0) {
4148 #endif
4149 			VATTR_NULL(vap);
4150 			vap->va_size = 0;
4151 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4152 #ifdef MAC
4153 		}
4154 #endif
4155 		vn_finished_write(mp);
4156 		if (error)
4157 			goto bad;
4158 	}
4159 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4160 	if (error)
4161 		goto bad;
4162 
4163 	if (fmode & FWRITE)
4164 		vp->v_writecount++;
4165 
4166 	/*
4167 	 * end of vn_open code
4168 	 */
4169 
4170 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4171 		if (fmode & FWRITE)
4172 			vp->v_writecount--;
4173 		goto bad;
4174 	}
4175 	/* An extra reference on `nfp' has been held for us by falloc(). */
4176 	fp = nfp;
4177 
4178 	nfp->f_vnode = vp;
4179 	nfp->f_data = vp;
4180 	nfp->f_flag = fmode & FMASK;
4181 	nfp->f_ops = &vnops;
4182 	nfp->f_type = DTYPE_VNODE;
4183 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4184 		lf.l_whence = SEEK_SET;
4185 		lf.l_start = 0;
4186 		lf.l_len = 0;
4187 		if (fmode & O_EXLOCK)
4188 			lf.l_type = F_WRLCK;
4189 		else
4190 			lf.l_type = F_RDLCK;
4191 		type = F_FLOCK;
4192 		if ((fmode & FNONBLOCK) == 0)
4193 			type |= F_WAIT;
4194 		VOP_UNLOCK(vp, 0, td);
4195 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4196 			    type)) != 0) {
4197 			/*
4198 			 * The lock request failed.  Normally close the
4199 			 * descriptor but handle the case where someone might
4200 			 * have dup()d or close()d it when we weren't looking.
4201 			 */
4202 			fdclose(fdp, fp, indx, td);
4203 
4204 			/*
4205 			 * release our private reference
4206 			 */
4207 			fdrop(fp, td);
4208 			goto out;
4209 		}
4210 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4211 		fp->f_flag |= FHASLOCK;
4212 	}
4213 
4214 	VOP_UNLOCK(vp, 0, td);
4215 	fdrop(fp, td);
4216 	vfs_rel(mp);
4217 	VFS_UNLOCK_GIANT(vfslocked);
4218 	td->td_retval[0] = indx;
4219 	return (0);
4220 
4221 bad:
4222 	vput(vp);
4223 out:
4224 	vfs_rel(mp);
4225 	VFS_UNLOCK_GIANT(vfslocked);
4226 	return (error);
4227 }
4228 
4229 /*
4230  * Stat an (NFS) file handle.
4231  *
4232  * MP SAFE
4233  */
4234 #ifndef _SYS_SYSPROTO_H_
4235 struct fhstat_args {
4236 	struct fhandle *u_fhp;
4237 	struct stat *sb;
4238 };
4239 #endif
4240 int
4241 fhstat(td, uap)
4242 	struct thread *td;
4243 	register struct fhstat_args /* {
4244 		struct fhandle *u_fhp;
4245 		struct stat *sb;
4246 	} */ *uap;
4247 {
4248 	struct stat sb;
4249 	fhandle_t fh;
4250 	struct mount *mp;
4251 	struct vnode *vp;
4252 	int vfslocked;
4253 	int error;
4254 
4255 	error = priv_check(td, PRIV_VFS_FHSTAT);
4256 	if (error)
4257 		return (error);
4258 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4259 	if (error)
4260 		return (error);
4261 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4262 		return (ESTALE);
4263 	vfslocked = VFS_LOCK_GIANT(mp);
4264 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4265 		vfs_rel(mp);
4266 		VFS_UNLOCK_GIANT(vfslocked);
4267 		return (error);
4268 	}
4269 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4270 	vput(vp);
4271 	vfs_rel(mp);
4272 	VFS_UNLOCK_GIANT(vfslocked);
4273 	if (error)
4274 		return (error);
4275 	error = copyout(&sb, uap->sb, sizeof(sb));
4276 	return (error);
4277 }
4278 
4279 /*
4280  * Implement fstatfs() for (NFS) file handles.
4281  *
4282  * MP SAFE
4283  */
4284 #ifndef _SYS_SYSPROTO_H_
4285 struct fhstatfs_args {
4286 	struct fhandle *u_fhp;
4287 	struct statfs *buf;
4288 };
4289 #endif
4290 int
4291 fhstatfs(td, uap)
4292 	struct thread *td;
4293 	struct fhstatfs_args /* {
4294 		struct fhandle *u_fhp;
4295 		struct statfs *buf;
4296 	} */ *uap;
4297 {
4298 	struct statfs sf;
4299 	fhandle_t fh;
4300 	int error;
4301 
4302 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4303 	if (error)
4304 		return (error);
4305 	error = kern_fhstatfs(td, fh, &sf);
4306 	if (error)
4307 		return (error);
4308 	return (copyout(&sf, uap->buf, sizeof(sf)));
4309 }
4310 
4311 int
4312 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4313 {
4314 	struct statfs *sp;
4315 	struct mount *mp;
4316 	struct vnode *vp;
4317 	int vfslocked;
4318 	int error;
4319 
4320 	error = priv_check(td, PRIV_VFS_FHSTATFS);
4321 	if (error)
4322 		return (error);
4323 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4324 		return (ESTALE);
4325 	vfslocked = VFS_LOCK_GIANT(mp);
4326 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4327 	if (error) {
4328 		VFS_UNLOCK_GIANT(vfslocked);
4329 		vfs_rel(mp);
4330 		return (error);
4331 	}
4332 	vput(vp);
4333 	error = prison_canseemount(td->td_ucred, mp);
4334 	if (error)
4335 		goto out;
4336 #ifdef MAC
4337 	error = mac_check_mount_stat(td->td_ucred, mp);
4338 	if (error)
4339 		goto out;
4340 #endif
4341 	/*
4342 	 * Set these in case the underlying filesystem fails to do so.
4343 	 */
4344 	sp = &mp->mnt_stat;
4345 	sp->f_version = STATFS_VERSION;
4346 	sp->f_namemax = NAME_MAX;
4347 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4348 	error = VFS_STATFS(mp, sp, td);
4349 	if (error == 0)
4350 		*buf = *sp;
4351 out:
4352 	vfs_rel(mp);
4353 	VFS_UNLOCK_GIANT(vfslocked);
4354 	return (error);
4355 }
4356 
4357 /*
4358  * Syscall to push extended attribute configuration information into the
4359  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4360  * a command (int cmd), and attribute name and misc data.  For now, the
4361  * attribute name is left in userspace for consumption by the VFS_op.
4362  * It will probably be changed to be copied into sysspace by the
4363  * syscall in the future, once issues with various consumers of the
4364  * attribute code have raised their hands.
4365  *
4366  * Currently this is used only by UFS Extended Attributes.
4367  */
4368 int
4369 extattrctl(td, uap)
4370 	struct thread *td;
4371 	struct extattrctl_args /* {
4372 		const char *path;
4373 		int cmd;
4374 		const char *filename;
4375 		int attrnamespace;
4376 		const char *attrname;
4377 	} */ *uap;
4378 {
4379 	struct vnode *filename_vp;
4380 	struct nameidata nd;
4381 	struct mount *mp, *mp_writable;
4382 	char attrname[EXTATTR_MAXNAMELEN];
4383 	int vfslocked, fnvfslocked, error;
4384 
4385 	AUDIT_ARG(cmd, uap->cmd);
4386 	AUDIT_ARG(value, uap->attrnamespace);
4387 	/*
4388 	 * uap->attrname is not always defined.  We check again later when we
4389 	 * invoke the VFS call so as to pass in NULL there if needed.
4390 	 */
4391 	if (uap->attrname != NULL) {
4392 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4393 		    NULL);
4394 		if (error)
4395 			return (error);
4396 	}
4397 	AUDIT_ARG(text, attrname);
4398 
4399 	vfslocked = fnvfslocked = 0;
4400 	/*
4401 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4402 	 * which VFS_EXTATTRCTL() will later release.
4403 	 */
4404 	filename_vp = NULL;
4405 	if (uap->filename != NULL) {
4406 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4407 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4408 		error = namei(&nd);
4409 		if (error)
4410 			return (error);
4411 		fnvfslocked = NDHASGIANT(&nd);
4412 		filename_vp = nd.ni_vp;
4413 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4414 	}
4415 
4416 	/* uap->path is always defined. */
4417 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4418 	    uap->path, td);
4419 	error = namei(&nd);
4420 	if (error) {
4421 		if (filename_vp != NULL)
4422 			vput(filename_vp);
4423 		goto out;
4424 	}
4425 	vfslocked = NDHASGIANT(&nd);
4426 	mp = nd.ni_vp->v_mount;
4427 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4428 	NDFREE(&nd, 0);
4429 	if (error) {
4430 		if (filename_vp != NULL)
4431 			vput(filename_vp);
4432 		goto out;
4433 	}
4434 
4435 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4436 	    uap->attrname != NULL ? attrname : NULL, td);
4437 
4438 	vn_finished_write(mp_writable);
4439 	/*
4440 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4441 	 * filename_vp, so vrele it if it is defined.
4442 	 */
4443 	if (filename_vp != NULL)
4444 		vrele(filename_vp);
4445 out:
4446 	VFS_UNLOCK_GIANT(fnvfslocked);
4447 	VFS_UNLOCK_GIANT(vfslocked);
4448 	return (error);
4449 }
4450 
4451 /*-
4452  * Set a named extended attribute on a file or directory
4453  *
4454  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4455  *            kernelspace string pointer "attrname", userspace buffer
4456  *            pointer "data", buffer length "nbytes", thread "td".
4457  * Returns: 0 on success, an error number otherwise
4458  * Locks: none
4459  * References: vp must be a valid reference for the duration of the call
4460  */
4461 static int
4462 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4463     void *data, size_t nbytes, struct thread *td)
4464 {
4465 	struct mount *mp;
4466 	struct uio auio;
4467 	struct iovec aiov;
4468 	ssize_t cnt;
4469 	int error;
4470 
4471 	VFS_ASSERT_GIANT(vp->v_mount);
4472 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4473 	if (error)
4474 		return (error);
4475 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4476 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4477 
4478 	aiov.iov_base = data;
4479 	aiov.iov_len = nbytes;
4480 	auio.uio_iov = &aiov;
4481 	auio.uio_iovcnt = 1;
4482 	auio.uio_offset = 0;
4483 	if (nbytes > INT_MAX) {
4484 		error = EINVAL;
4485 		goto done;
4486 	}
4487 	auio.uio_resid = nbytes;
4488 	auio.uio_rw = UIO_WRITE;
4489 	auio.uio_segflg = UIO_USERSPACE;
4490 	auio.uio_td = td;
4491 	cnt = nbytes;
4492 
4493 #ifdef MAC
4494 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4495 	    attrname, &auio);
4496 	if (error)
4497 		goto done;
4498 #endif
4499 
4500 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4501 	    td->td_ucred, td);
4502 	cnt -= auio.uio_resid;
4503 	td->td_retval[0] = cnt;
4504 
4505 done:
4506 	VOP_UNLOCK(vp, 0, td);
4507 	vn_finished_write(mp);
4508 	return (error);
4509 }
4510 
4511 int
4512 extattr_set_fd(td, uap)
4513 	struct thread *td;
4514 	struct extattr_set_fd_args /* {
4515 		int fd;
4516 		int attrnamespace;
4517 		const char *attrname;
4518 		void *data;
4519 		size_t nbytes;
4520 	} */ *uap;
4521 {
4522 	struct file *fp;
4523 	char attrname[EXTATTR_MAXNAMELEN];
4524 	int vfslocked, error;
4525 
4526 	AUDIT_ARG(fd, uap->fd);
4527 	AUDIT_ARG(value, uap->attrnamespace);
4528 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4529 	if (error)
4530 		return (error);
4531 	AUDIT_ARG(text, attrname);
4532 
4533 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4534 	if (error)
4535 		return (error);
4536 
4537 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4538 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4539 	    attrname, uap->data, uap->nbytes, td);
4540 	fdrop(fp, td);
4541 	VFS_UNLOCK_GIANT(vfslocked);
4542 
4543 	return (error);
4544 }
4545 
4546 int
4547 extattr_set_file(td, uap)
4548 	struct thread *td;
4549 	struct extattr_set_file_args /* {
4550 		const char *path;
4551 		int attrnamespace;
4552 		const char *attrname;
4553 		void *data;
4554 		size_t nbytes;
4555 	} */ *uap;
4556 {
4557 	struct nameidata nd;
4558 	char attrname[EXTATTR_MAXNAMELEN];
4559 	int vfslocked, error;
4560 
4561 	AUDIT_ARG(value, uap->attrnamespace);
4562 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4563 	if (error)
4564 		return (error);
4565 	AUDIT_ARG(text, attrname);
4566 
4567 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4568 	    uap->path, td);
4569 	error = namei(&nd);
4570 	if (error)
4571 		return (error);
4572 	NDFREE(&nd, NDF_ONLY_PNBUF);
4573 
4574 	vfslocked = NDHASGIANT(&nd);
4575 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4576 	    uap->data, uap->nbytes, td);
4577 
4578 	vrele(nd.ni_vp);
4579 	VFS_UNLOCK_GIANT(vfslocked);
4580 	return (error);
4581 }
4582 
4583 int
4584 extattr_set_link(td, uap)
4585 	struct thread *td;
4586 	struct extattr_set_link_args /* {
4587 		const char *path;
4588 		int attrnamespace;
4589 		const char *attrname;
4590 		void *data;
4591 		size_t nbytes;
4592 	} */ *uap;
4593 {
4594 	struct nameidata nd;
4595 	char attrname[EXTATTR_MAXNAMELEN];
4596 	int vfslocked, error;
4597 
4598 	AUDIT_ARG(value, uap->attrnamespace);
4599 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4600 	if (error)
4601 		return (error);
4602 	AUDIT_ARG(text, attrname);
4603 
4604 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4605 	    uap->path, td);
4606 	error = namei(&nd);
4607 	if (error)
4608 		return (error);
4609 	NDFREE(&nd, NDF_ONLY_PNBUF);
4610 
4611 	vfslocked = NDHASGIANT(&nd);
4612 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4613 	    uap->data, uap->nbytes, td);
4614 
4615 	vrele(nd.ni_vp);
4616 	VFS_UNLOCK_GIANT(vfslocked);
4617 	return (error);
4618 }
4619 
4620 /*-
4621  * Get a named extended attribute on a file or directory
4622  *
4623  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4624  *            kernelspace string pointer "attrname", userspace buffer
4625  *            pointer "data", buffer length "nbytes", thread "td".
4626  * Returns: 0 on success, an error number otherwise
4627  * Locks: none
4628  * References: vp must be a valid reference for the duration of the call
4629  */
4630 static int
4631 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4632     void *data, size_t nbytes, struct thread *td)
4633 {
4634 	struct uio auio, *auiop;
4635 	struct iovec aiov;
4636 	ssize_t cnt;
4637 	size_t size, *sizep;
4638 	int error;
4639 
4640 	VFS_ASSERT_GIANT(vp->v_mount);
4641 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4642 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4643 
4644 	/*
4645 	 * Slightly unusual semantics: if the user provides a NULL data
4646 	 * pointer, they don't want to receive the data, just the
4647 	 * maximum read length.
4648 	 */
4649 	auiop = NULL;
4650 	sizep = NULL;
4651 	cnt = 0;
4652 	if (data != NULL) {
4653 		aiov.iov_base = data;
4654 		aiov.iov_len = nbytes;
4655 		auio.uio_iov = &aiov;
4656 		auio.uio_iovcnt = 1;
4657 		auio.uio_offset = 0;
4658 		if (nbytes > INT_MAX) {
4659 			error = EINVAL;
4660 			goto done;
4661 		}
4662 		auio.uio_resid = nbytes;
4663 		auio.uio_rw = UIO_READ;
4664 		auio.uio_segflg = UIO_USERSPACE;
4665 		auio.uio_td = td;
4666 		auiop = &auio;
4667 		cnt = nbytes;
4668 	} else
4669 		sizep = &size;
4670 
4671 #ifdef MAC
4672 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4673 	    attrname, &auio);
4674 	if (error)
4675 		goto done;
4676 #endif
4677 
4678 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4679 	    td->td_ucred, td);
4680 
4681 	if (auiop != NULL) {
4682 		cnt -= auio.uio_resid;
4683 		td->td_retval[0] = cnt;
4684 	} else
4685 		td->td_retval[0] = size;
4686 
4687 done:
4688 	VOP_UNLOCK(vp, 0, td);
4689 	return (error);
4690 }
4691 
4692 int
4693 extattr_get_fd(td, uap)
4694 	struct thread *td;
4695 	struct extattr_get_fd_args /* {
4696 		int fd;
4697 		int attrnamespace;
4698 		const char *attrname;
4699 		void *data;
4700 		size_t nbytes;
4701 	} */ *uap;
4702 {
4703 	struct file *fp;
4704 	char attrname[EXTATTR_MAXNAMELEN];
4705 	int vfslocked, error;
4706 
4707 	AUDIT_ARG(fd, uap->fd);
4708 	AUDIT_ARG(value, uap->attrnamespace);
4709 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4710 	if (error)
4711 		return (error);
4712 	AUDIT_ARG(text, attrname);
4713 
4714 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4715 	if (error)
4716 		return (error);
4717 
4718 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4719 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4720 	    attrname, uap->data, uap->nbytes, td);
4721 
4722 	fdrop(fp, td);
4723 	VFS_UNLOCK_GIANT(vfslocked);
4724 	return (error);
4725 }
4726 
4727 int
4728 extattr_get_file(td, uap)
4729 	struct thread *td;
4730 	struct extattr_get_file_args /* {
4731 		const char *path;
4732 		int attrnamespace;
4733 		const char *attrname;
4734 		void *data;
4735 		size_t nbytes;
4736 	} */ *uap;
4737 {
4738 	struct nameidata nd;
4739 	char attrname[EXTATTR_MAXNAMELEN];
4740 	int vfslocked, error;
4741 
4742 	AUDIT_ARG(value, uap->attrnamespace);
4743 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4744 	if (error)
4745 		return (error);
4746 	AUDIT_ARG(text, attrname);
4747 
4748 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4749 	    uap->path, td);
4750 	error = namei(&nd);
4751 	if (error)
4752 		return (error);
4753 	NDFREE(&nd, NDF_ONLY_PNBUF);
4754 
4755 	vfslocked = NDHASGIANT(&nd);
4756 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4757 	    uap->data, uap->nbytes, td);
4758 
4759 	vrele(nd.ni_vp);
4760 	VFS_UNLOCK_GIANT(vfslocked);
4761 	return (error);
4762 }
4763 
4764 int
4765 extattr_get_link(td, uap)
4766 	struct thread *td;
4767 	struct extattr_get_link_args /* {
4768 		const char *path;
4769 		int attrnamespace;
4770 		const char *attrname;
4771 		void *data;
4772 		size_t nbytes;
4773 	} */ *uap;
4774 {
4775 	struct nameidata nd;
4776 	char attrname[EXTATTR_MAXNAMELEN];
4777 	int vfslocked, error;
4778 
4779 	AUDIT_ARG(value, uap->attrnamespace);
4780 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4781 	if (error)
4782 		return (error);
4783 	AUDIT_ARG(text, attrname);
4784 
4785 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4786 	    uap->path, td);
4787 	error = namei(&nd);
4788 	if (error)
4789 		return (error);
4790 	NDFREE(&nd, NDF_ONLY_PNBUF);
4791 
4792 	vfslocked = NDHASGIANT(&nd);
4793 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4794 	    uap->data, uap->nbytes, td);
4795 
4796 	vrele(nd.ni_vp);
4797 	VFS_UNLOCK_GIANT(vfslocked);
4798 	return (error);
4799 }
4800 
4801 /*
4802  * extattr_delete_vp(): Delete a named extended attribute on a file or
4803  *                      directory
4804  *
4805  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4806  *            kernelspace string pointer "attrname", proc "p"
4807  * Returns: 0 on success, an error number otherwise
4808  * Locks: none
4809  * References: vp must be a valid reference for the duration of the call
4810  */
4811 static int
4812 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4813     struct thread *td)
4814 {
4815 	struct mount *mp;
4816 	int error;
4817 
4818 	VFS_ASSERT_GIANT(vp->v_mount);
4819 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4820 	if (error)
4821 		return (error);
4822 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4823 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4824 
4825 #ifdef MAC
4826 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4827 	    attrname);
4828 	if (error)
4829 		goto done;
4830 #endif
4831 
4832 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4833 	    td);
4834 	if (error == EOPNOTSUPP)
4835 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4836 		    td->td_ucred, td);
4837 #ifdef MAC
4838 done:
4839 #endif
4840 	VOP_UNLOCK(vp, 0, td);
4841 	vn_finished_write(mp);
4842 	return (error);
4843 }
4844 
4845 int
4846 extattr_delete_fd(td, uap)
4847 	struct thread *td;
4848 	struct extattr_delete_fd_args /* {
4849 		int fd;
4850 		int attrnamespace;
4851 		const char *attrname;
4852 	} */ *uap;
4853 {
4854 	struct file *fp;
4855 	char attrname[EXTATTR_MAXNAMELEN];
4856 	int vfslocked, error;
4857 
4858 	AUDIT_ARG(fd, uap->fd);
4859 	AUDIT_ARG(value, uap->attrnamespace);
4860 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4861 	if (error)
4862 		return (error);
4863 	AUDIT_ARG(text, attrname);
4864 
4865 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4866 	if (error)
4867 		return (error);
4868 
4869 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4870 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4871 	    attrname, td);
4872 	fdrop(fp, td);
4873 	VFS_UNLOCK_GIANT(vfslocked);
4874 	return (error);
4875 }
4876 
4877 int
4878 extattr_delete_file(td, uap)
4879 	struct thread *td;
4880 	struct extattr_delete_file_args /* {
4881 		const char *path;
4882 		int attrnamespace;
4883 		const char *attrname;
4884 	} */ *uap;
4885 {
4886 	struct nameidata nd;
4887 	char attrname[EXTATTR_MAXNAMELEN];
4888 	int vfslocked, error;
4889 
4890 	AUDIT_ARG(value, uap->attrnamespace);
4891 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4892 	if (error)
4893 		return(error);
4894 	AUDIT_ARG(text, attrname);
4895 
4896 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4897 	    uap->path, td);
4898 	error = namei(&nd);
4899 	if (error)
4900 		return(error);
4901 	NDFREE(&nd, NDF_ONLY_PNBUF);
4902 
4903 	vfslocked = NDHASGIANT(&nd);
4904 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4905 	vrele(nd.ni_vp);
4906 	VFS_UNLOCK_GIANT(vfslocked);
4907 	return(error);
4908 }
4909 
4910 int
4911 extattr_delete_link(td, uap)
4912 	struct thread *td;
4913 	struct extattr_delete_link_args /* {
4914 		const char *path;
4915 		int attrnamespace;
4916 		const char *attrname;
4917 	} */ *uap;
4918 {
4919 	struct nameidata nd;
4920 	char attrname[EXTATTR_MAXNAMELEN];
4921 	int vfslocked, error;
4922 
4923 	AUDIT_ARG(value, uap->attrnamespace);
4924 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4925 	if (error)
4926 		return(error);
4927 	AUDIT_ARG(text, attrname);
4928 
4929 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4930 	    uap->path, td);
4931 	error = namei(&nd);
4932 	if (error)
4933 		return(error);
4934 	NDFREE(&nd, NDF_ONLY_PNBUF);
4935 
4936 	vfslocked = NDHASGIANT(&nd);
4937 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4938 	vrele(nd.ni_vp);
4939 	VFS_UNLOCK_GIANT(vfslocked);
4940 	return(error);
4941 }
4942 
4943 /*-
4944  * Retrieve a list of extended attributes on a file or directory.
4945  *
4946  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4947  *            userspace buffer pointer "data", buffer length "nbytes",
4948  *            thread "td".
4949  * Returns: 0 on success, an error number otherwise
4950  * Locks: none
4951  * References: vp must be a valid reference for the duration of the call
4952  */
4953 static int
4954 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4955     size_t nbytes, struct thread *td)
4956 {
4957 	struct uio auio, *auiop;
4958 	size_t size, *sizep;
4959 	struct iovec aiov;
4960 	ssize_t cnt;
4961 	int error;
4962 
4963 	VFS_ASSERT_GIANT(vp->v_mount);
4964 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4965 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4966 
4967 	auiop = NULL;
4968 	sizep = NULL;
4969 	cnt = 0;
4970 	if (data != NULL) {
4971 		aiov.iov_base = data;
4972 		aiov.iov_len = nbytes;
4973 		auio.uio_iov = &aiov;
4974 		auio.uio_iovcnt = 1;
4975 		auio.uio_offset = 0;
4976 		if (nbytes > INT_MAX) {
4977 			error = EINVAL;
4978 			goto done;
4979 		}
4980 		auio.uio_resid = nbytes;
4981 		auio.uio_rw = UIO_READ;
4982 		auio.uio_segflg = UIO_USERSPACE;
4983 		auio.uio_td = td;
4984 		auiop = &auio;
4985 		cnt = nbytes;
4986 	} else
4987 		sizep = &size;
4988 
4989 #ifdef MAC
4990 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4991 	if (error)
4992 		goto done;
4993 #endif
4994 
4995 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4996 	    td->td_ucred, td);
4997 
4998 	if (auiop != NULL) {
4999 		cnt -= auio.uio_resid;
5000 		td->td_retval[0] = cnt;
5001 	} else
5002 		td->td_retval[0] = size;
5003 
5004 done:
5005 	VOP_UNLOCK(vp, 0, td);
5006 	return (error);
5007 }
5008 
5009 
5010 int
5011 extattr_list_fd(td, uap)
5012 	struct thread *td;
5013 	struct extattr_list_fd_args /* {
5014 		int fd;
5015 		int attrnamespace;
5016 		void *data;
5017 		size_t nbytes;
5018 	} */ *uap;
5019 {
5020 	struct file *fp;
5021 	int vfslocked, error;
5022 
5023 	AUDIT_ARG(fd, uap->fd);
5024 	AUDIT_ARG(value, uap->attrnamespace);
5025 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5026 	if (error)
5027 		return (error);
5028 
5029 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5030 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5031 	    uap->nbytes, td);
5032 
5033 	fdrop(fp, td);
5034 	VFS_UNLOCK_GIANT(vfslocked);
5035 	return (error);
5036 }
5037 
5038 int
5039 extattr_list_file(td, uap)
5040 	struct thread*td;
5041 	struct extattr_list_file_args /* {
5042 		const char *path;
5043 		int attrnamespace;
5044 		void *data;
5045 		size_t nbytes;
5046 	} */ *uap;
5047 {
5048 	struct nameidata nd;
5049 	int vfslocked, error;
5050 
5051 	AUDIT_ARG(value, uap->attrnamespace);
5052 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5053 	    uap->path, td);
5054 	error = namei(&nd);
5055 	if (error)
5056 		return (error);
5057 	NDFREE(&nd, NDF_ONLY_PNBUF);
5058 
5059 	vfslocked = NDHASGIANT(&nd);
5060 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5061 	    uap->nbytes, td);
5062 
5063 	vrele(nd.ni_vp);
5064 	VFS_UNLOCK_GIANT(vfslocked);
5065 	return (error);
5066 }
5067 
5068 int
5069 extattr_list_link(td, uap)
5070 	struct thread*td;
5071 	struct extattr_list_link_args /* {
5072 		const char *path;
5073 		int attrnamespace;
5074 		void *data;
5075 		size_t nbytes;
5076 	} */ *uap;
5077 {
5078 	struct nameidata nd;
5079 	int vfslocked, error;
5080 
5081 	AUDIT_ARG(value, uap->attrnamespace);
5082 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5083 	    uap->path, td);
5084 	error = namei(&nd);
5085 	if (error)
5086 		return (error);
5087 	NDFREE(&nd, NDF_ONLY_PNBUF);
5088 
5089 	vfslocked = NDHASGIANT(&nd);
5090 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5091 	    uap->nbytes, td);
5092 
5093 	vrele(nd.ni_vp);
5094 	VFS_UNLOCK_GIANT(vfslocked);
5095 	return (error);
5096 }
5097