xref: /freebsd/sys/kern/vfs_extattr.c (revision a4eb85b6acb49cb60c72c2cab0d0d3f00eaa6d46)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 	int asyncflag;
126 
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		vfslocked = VFS_LOCK_GIANT(mp);
134 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
135 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
136 			asyncflag = mp->mnt_flag & MNT_ASYNC;
137 			mp->mnt_flag &= ~MNT_ASYNC;
138 			vfs_msync(mp, MNT_NOWAIT);
139 			VFS_SYNC(mp, MNT_NOWAIT, td);
140 			mp->mnt_flag |= asyncflag;
141 			vn_finished_write(mp);
142 		}
143 		VFS_UNLOCK_GIANT(vfslocked);
144 		mtx_lock(&mountlist_mtx);
145 		nmp = TAILQ_NEXT(mp, mnt_list);
146 		vfs_unbusy(mp, td);
147 	}
148 	mtx_unlock(&mountlist_mtx);
149 	return (0);
150 }
151 
152 /* XXX PRISON: could be per prison flag */
153 static int prison_quotas;
154 #if 0
155 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
156 #endif
157 
158 /*
159  * Change filesystem quotas.
160  *
161  * MP SAFE
162  */
163 #ifndef _SYS_SYSPROTO_H_
164 struct quotactl_args {
165 	char *path;
166 	int cmd;
167 	int uid;
168 	caddr_t arg;
169 };
170 #endif
171 int
172 quotactl(td, uap)
173 	struct thread *td;
174 	register struct quotactl_args /* {
175 		char *path;
176 		int cmd;
177 		int uid;
178 		caddr_t arg;
179 	} */ *uap;
180 {
181 	struct mount *mp, *vmp;
182 	int vfslocked;
183 	int error;
184 	struct nameidata nd;
185 
186 	AUDIT_ARG(cmd, uap->cmd);
187 	AUDIT_ARG(uid, uap->uid);
188 	if (jailed(td->td_ucred) && !prison_quotas)
189 		return (EPERM);
190 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
191 	   UIO_USERSPACE, uap->path, td);
192 	if ((error = namei(&nd)) != 0)
193 		return (error);
194 	vfslocked = NDHASGIANT(&nd);
195 	NDFREE(&nd, NDF_ONLY_PNBUF);
196 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
197 	mp = nd.ni_vp->v_mount;
198 	vrele(nd.ni_vp);
199 	if (error)
200 		goto out;
201 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202 	vn_finished_write(vmp);
203 out:
204 	VFS_UNLOCK_GIANT(vfslocked);
205 	return (error);
206 }
207 
208 /*
209  * Get filesystem statistics.
210  */
211 #ifndef _SYS_SYSPROTO_H_
212 struct statfs_args {
213 	char *path;
214 	struct statfs *buf;
215 };
216 #endif
217 int
218 statfs(td, uap)
219 	struct thread *td;
220 	register struct statfs_args /* {
221 		char *path;
222 		struct statfs *buf;
223 	} */ *uap;
224 {
225 	struct statfs sf;
226 	int error;
227 
228 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
229 	if (error == 0)
230 		error = copyout(&sf, uap->buf, sizeof(sf));
231 	return (error);
232 }
233 
234 int
235 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
236     struct statfs *buf)
237 {
238 	struct mount *mp;
239 	struct statfs *sp, sb;
240 	int vfslocked;
241 	int error;
242 	struct nameidata nd;
243 
244 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
245 	    pathseg, path, td);
246 	error = namei(&nd);
247 	if (error)
248 		return (error);
249 	vfslocked = NDHASGIANT(&nd);
250 	mp = nd.ni_vp->v_mount;
251 	vfs_ref(mp);
252 	NDFREE(&nd, NDF_ONLY_PNBUF);
253 	vput(nd.ni_vp);
254 #ifdef MAC
255 	error = mac_check_mount_stat(td->td_ucred, mp);
256 	if (error) {
257 		vfs_rel(mp);
258 		goto out;
259 	}
260 #endif
261 	/*
262 	 * Set these in case the underlying filesystem fails to do so.
263 	 */
264 	sp = &mp->mnt_stat;
265 	sp->f_version = STATFS_VERSION;
266 	sp->f_namemax = NAME_MAX;
267 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
268 	error = VFS_STATFS(mp, sp, td);
269 	vfs_rel(mp);
270 	if (error)
271 		goto out;
272 	if (suser(td)) {
273 		bcopy(sp, &sb, sizeof(sb));
274 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
275 		prison_enforce_statfs(td->td_ucred, mp, &sb);
276 		sp = &sb;
277 	}
278 	*buf = *sp;
279 out:
280 	VFS_UNLOCK_GIANT(vfslocked);
281 	if (mtx_owned(&Giant))
282 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
283 	return (error);
284 }
285 
286 /*
287  * Get filesystem statistics.
288  */
289 #ifndef _SYS_SYSPROTO_H_
290 struct fstatfs_args {
291 	int fd;
292 	struct statfs *buf;
293 };
294 #endif
295 int
296 fstatfs(td, uap)
297 	struct thread *td;
298 	register struct fstatfs_args /* {
299 		int fd;
300 		struct statfs *buf;
301 	} */ *uap;
302 {
303 	struct statfs sf;
304 	int error;
305 
306 	error = kern_fstatfs(td, uap->fd, &sf);
307 	if (error == 0)
308 		error = copyout(&sf, uap->buf, sizeof(sf));
309 	return (error);
310 }
311 
312 int
313 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
314 {
315 	struct file *fp;
316 	struct mount *mp;
317 	struct statfs *sp, sb;
318 	int vfslocked;
319 	struct vnode *vp;
320 	int error;
321 
322 	AUDIT_ARG(fd, fd);
323 	error = getvnode(td->td_proc->p_fd, fd, &fp);
324 	if (error)
325 		return (error);
326 	vp = fp->f_vnode;
327 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
328 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
329 #ifdef AUDIT
330 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
331 #endif
332 	mp = vp->v_mount;
333 	if (mp)
334 		vfs_ref(mp);
335 	VOP_UNLOCK(vp, 0, td);
336 	fdrop(fp, td);
337 	if (vp->v_iflag & VI_DOOMED) {
338 		if (mp)
339 			vfs_rel(mp);
340 		error = EBADF;
341 		goto out;
342 	}
343 #ifdef MAC
344 	error = mac_check_mount_stat(td->td_ucred, mp);
345 	if (error) {
346 		vfs_rel(mp);
347 		goto out;
348 	}
349 #endif
350 	/*
351 	 * Set these in case the underlying filesystem fails to do so.
352 	 */
353 	sp = &mp->mnt_stat;
354 	sp->f_version = STATFS_VERSION;
355 	sp->f_namemax = NAME_MAX;
356 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357 	error = VFS_STATFS(mp, sp, td);
358 	vfs_rel(mp);
359 	if (error)
360 		goto out;
361 	if (suser(td)) {
362 		bcopy(sp, &sb, sizeof(sb));
363 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
364 		prison_enforce_statfs(td->td_ucred, mp, &sb);
365 		sp = &sb;
366 	}
367 	*buf = *sp;
368 out:
369 	VFS_UNLOCK_GIANT(vfslocked);
370 	return (error);
371 }
372 
373 /*
374  * Get statistics on all filesystems.
375  */
376 #ifndef _SYS_SYSPROTO_H_
377 struct getfsstat_args {
378 	struct statfs *buf;
379 	long bufsize;
380 	int flags;
381 };
382 #endif
383 int
384 getfsstat(td, uap)
385 	struct thread *td;
386 	register struct getfsstat_args /* {
387 		struct statfs *buf;
388 		long bufsize;
389 		int flags;
390 	} */ *uap;
391 {
392 
393 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
394 	    uap->flags));
395 }
396 
397 /*
398  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
399  * 	The caller is responsible for freeing memory which will be allocated
400  *	in '*buf'.
401  */
402 int
403 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
404     enum uio_seg bufseg, int flags)
405 {
406 	struct mount *mp, *nmp;
407 	struct statfs *sfsp, *sp, sb;
408 	size_t count, maxcount;
409 	int vfslocked;
410 	int error;
411 
412 	maxcount = bufsize / sizeof(struct statfs);
413 	if (bufsize == 0)
414 		sfsp = NULL;
415 	else if (bufseg == UIO_USERSPACE)
416 		sfsp = *buf;
417 	else /* if (bufseg == UIO_SYSSPACE) */ {
418 		count = 0;
419 		mtx_lock(&mountlist_mtx);
420 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
421 			count++;
422 		}
423 		mtx_unlock(&mountlist_mtx);
424 		if (maxcount > count)
425 			maxcount = count;
426 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
427 		    M_WAITOK);
428 	}
429 	count = 0;
430 	mtx_lock(&mountlist_mtx);
431 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
432 		if (prison_canseemount(td->td_ucred, mp) != 0) {
433 			nmp = TAILQ_NEXT(mp, mnt_list);
434 			continue;
435 		}
436 #ifdef MAC
437 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
438 			nmp = TAILQ_NEXT(mp, mnt_list);
439 			continue;
440 		}
441 #endif
442 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
443 			nmp = TAILQ_NEXT(mp, mnt_list);
444 			continue;
445 		}
446 		vfslocked = VFS_LOCK_GIANT(mp);
447 		if (sfsp && count < maxcount) {
448 			sp = &mp->mnt_stat;
449 			/*
450 			 * Set these in case the underlying filesystem
451 			 * fails to do so.
452 			 */
453 			sp->f_version = STATFS_VERSION;
454 			sp->f_namemax = NAME_MAX;
455 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
456 			/*
457 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
458 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
459 			 * overrides MNT_WAIT.
460 			 */
461 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
462 			    (flags & MNT_WAIT)) &&
463 			    (error = VFS_STATFS(mp, sp, td))) {
464 				VFS_UNLOCK_GIANT(vfslocked);
465 				mtx_lock(&mountlist_mtx);
466 				nmp = TAILQ_NEXT(mp, mnt_list);
467 				vfs_unbusy(mp, td);
468 				continue;
469 			}
470 			if (suser(td)) {
471 				bcopy(sp, &sb, sizeof(sb));
472 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
473 				prison_enforce_statfs(td->td_ucred, mp, &sb);
474 				sp = &sb;
475 			}
476 			if (bufseg == UIO_SYSSPACE)
477 				bcopy(sp, sfsp, sizeof(*sp));
478 			else /* if (bufseg == UIO_USERSPACE) */ {
479 				error = copyout(sp, sfsp, sizeof(*sp));
480 				if (error) {
481 					vfs_unbusy(mp, td);
482 					VFS_UNLOCK_GIANT(vfslocked);
483 					return (error);
484 				}
485 			}
486 			sfsp++;
487 		}
488 		VFS_UNLOCK_GIANT(vfslocked);
489 		count++;
490 		mtx_lock(&mountlist_mtx);
491 		nmp = TAILQ_NEXT(mp, mnt_list);
492 		vfs_unbusy(mp, td);
493 	}
494 	mtx_unlock(&mountlist_mtx);
495 	if (sfsp && count > maxcount)
496 		td->td_retval[0] = maxcount;
497 	else
498 		td->td_retval[0] = count;
499 	return (0);
500 }
501 
502 #ifdef COMPAT_FREEBSD4
503 /*
504  * Get old format filesystem statistics.
505  */
506 static void cvtstatfs(struct statfs *, struct ostatfs *);
507 
508 #ifndef _SYS_SYSPROTO_H_
509 struct freebsd4_statfs_args {
510 	char *path;
511 	struct ostatfs *buf;
512 };
513 #endif
514 int
515 freebsd4_statfs(td, uap)
516 	struct thread *td;
517 	struct freebsd4_statfs_args /* {
518 		char *path;
519 		struct ostatfs *buf;
520 	} */ *uap;
521 {
522 	struct ostatfs osb;
523 	struct statfs sf;
524 	int error;
525 
526 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
527 	if (error)
528 		return (error);
529 	cvtstatfs(&sf, &osb);
530 	return (copyout(&osb, uap->buf, sizeof(osb)));
531 }
532 
533 /*
534  * Get filesystem statistics.
535  */
536 #ifndef _SYS_SYSPROTO_H_
537 struct freebsd4_fstatfs_args {
538 	int fd;
539 	struct ostatfs *buf;
540 };
541 #endif
542 int
543 freebsd4_fstatfs(td, uap)
544 	struct thread *td;
545 	struct freebsd4_fstatfs_args /* {
546 		int fd;
547 		struct ostatfs *buf;
548 	} */ *uap;
549 {
550 	struct ostatfs osb;
551 	struct statfs sf;
552 	int error;
553 
554 	error = kern_fstatfs(td, uap->fd, &sf);
555 	if (error)
556 		return (error);
557 	cvtstatfs(&sf, &osb);
558 	return (copyout(&osb, uap->buf, sizeof(osb)));
559 }
560 
561 /*
562  * Get statistics on all filesystems.
563  */
564 #ifndef _SYS_SYSPROTO_H_
565 struct freebsd4_getfsstat_args {
566 	struct ostatfs *buf;
567 	long bufsize;
568 	int flags;
569 };
570 #endif
571 int
572 freebsd4_getfsstat(td, uap)
573 	struct thread *td;
574 	register struct freebsd4_getfsstat_args /* {
575 		struct ostatfs *buf;
576 		long bufsize;
577 		int flags;
578 	} */ *uap;
579 {
580 	struct statfs *buf, *sp;
581 	struct ostatfs osb;
582 	size_t count, size;
583 	int error;
584 
585 	count = uap->bufsize / sizeof(struct ostatfs);
586 	size = count * sizeof(struct statfs);
587 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
588 	if (size > 0) {
589 		count = td->td_retval[0];
590 		sp = buf;
591 		while (count > 0 && error == 0) {
592 			cvtstatfs(sp, &osb);
593 			error = copyout(&osb, uap->buf, sizeof(osb));
594 			sp++;
595 			uap->buf++;
596 			count--;
597 		}
598 		free(buf, M_TEMP);
599 	}
600 	return (error);
601 }
602 
603 /*
604  * Implement fstatfs() for (NFS) file handles.
605  */
606 #ifndef _SYS_SYSPROTO_H_
607 struct freebsd4_fhstatfs_args {
608 	struct fhandle *u_fhp;
609 	struct ostatfs *buf;
610 };
611 #endif
612 int
613 freebsd4_fhstatfs(td, uap)
614 	struct thread *td;
615 	struct freebsd4_fhstatfs_args /* {
616 		struct fhandle *u_fhp;
617 		struct ostatfs *buf;
618 	} */ *uap;
619 {
620 	struct ostatfs osb;
621 	struct statfs sf;
622 	fhandle_t fh;
623 	int error;
624 
625 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
626 	if (error)
627 		return (error);
628 	error = kern_fhstatfs(td, fh, &sf);
629 	if (error)
630 		return (error);
631 	cvtstatfs(&sf, &osb);
632 	return (copyout(&osb, uap->buf, sizeof(osb)));
633 }
634 
635 /*
636  * Convert a new format statfs structure to an old format statfs structure.
637  */
638 static void
639 cvtstatfs(nsp, osp)
640 	struct statfs *nsp;
641 	struct ostatfs *osp;
642 {
643 
644 	bzero(osp, sizeof(*osp));
645 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
646 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
647 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
648 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
649 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
650 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
651 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
652 	osp->f_owner = nsp->f_owner;
653 	osp->f_type = nsp->f_type;
654 	osp->f_flags = nsp->f_flags;
655 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
656 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
657 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
658 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
659 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
660 	    MIN(MFSNAMELEN, OMFSNAMELEN));
661 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
662 	    MIN(MNAMELEN, OMNAMELEN));
663 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
664 	    MIN(MNAMELEN, OMNAMELEN));
665 	osp->f_fsid = nsp->f_fsid;
666 }
667 #endif /* COMPAT_FREEBSD4 */
668 
669 /*
670  * Change current working directory to a given file descriptor.
671  */
672 #ifndef _SYS_SYSPROTO_H_
673 struct fchdir_args {
674 	int	fd;
675 };
676 #endif
677 int
678 fchdir(td, uap)
679 	struct thread *td;
680 	struct fchdir_args /* {
681 		int fd;
682 	} */ *uap;
683 {
684 	register struct filedesc *fdp = td->td_proc->p_fd;
685 	struct vnode *vp, *tdp, *vpold;
686 	struct mount *mp;
687 	struct file *fp;
688 	int vfslocked;
689 	int error;
690 
691 	AUDIT_ARG(fd, uap->fd);
692 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
693 		return (error);
694 	vp = fp->f_vnode;
695 	VREF(vp);
696 	fdrop(fp, td);
697 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
698 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
699 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
700 	error = change_dir(vp, td);
701 	while (!error && (mp = vp->v_mountedhere) != NULL) {
702 		int tvfslocked;
703 		if (vfs_busy(mp, 0, 0, td))
704 			continue;
705 		tvfslocked = VFS_LOCK_GIANT(mp);
706 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
707 		vfs_unbusy(mp, td);
708 		if (error) {
709 			VFS_UNLOCK_GIANT(tvfslocked);
710 			break;
711 		}
712 		vput(vp);
713 		VFS_UNLOCK_GIANT(vfslocked);
714 		vp = tdp;
715 		vfslocked = tvfslocked;
716 	}
717 	if (error) {
718 		vput(vp);
719 		VFS_UNLOCK_GIANT(vfslocked);
720 		return (error);
721 	}
722 	VOP_UNLOCK(vp, 0, td);
723 	VFS_UNLOCK_GIANT(vfslocked);
724 	FILEDESC_LOCK_FAST(fdp);
725 	vpold = fdp->fd_cdir;
726 	fdp->fd_cdir = vp;
727 	FILEDESC_UNLOCK_FAST(fdp);
728 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
729 	vrele(vpold);
730 	VFS_UNLOCK_GIANT(vfslocked);
731 	return (0);
732 }
733 
734 /*
735  * Change current working directory (``.'').
736  */
737 #ifndef _SYS_SYSPROTO_H_
738 struct chdir_args {
739 	char	*path;
740 };
741 #endif
742 int
743 chdir(td, uap)
744 	struct thread *td;
745 	struct chdir_args /* {
746 		char *path;
747 	} */ *uap;
748 {
749 
750 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
751 }
752 
753 int
754 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
755 {
756 	register struct filedesc *fdp = td->td_proc->p_fd;
757 	int error;
758 	struct nameidata nd;
759 	struct vnode *vp;
760 	int vfslocked;
761 
762 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
763 	    pathseg, path, td);
764 	if ((error = namei(&nd)) != 0)
765 		return (error);
766 	vfslocked = NDHASGIANT(&nd);
767 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
768 		vput(nd.ni_vp);
769 		VFS_UNLOCK_GIANT(vfslocked);
770 		NDFREE(&nd, NDF_ONLY_PNBUF);
771 		return (error);
772 	}
773 	VOP_UNLOCK(nd.ni_vp, 0, td);
774 	VFS_UNLOCK_GIANT(vfslocked);
775 	NDFREE(&nd, NDF_ONLY_PNBUF);
776 	FILEDESC_LOCK_FAST(fdp);
777 	vp = fdp->fd_cdir;
778 	fdp->fd_cdir = nd.ni_vp;
779 	FILEDESC_UNLOCK_FAST(fdp);
780 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
781 	vrele(vp);
782 	VFS_UNLOCK_GIANT(vfslocked);
783 	return (0);
784 }
785 
786 /*
787  * Helper function for raised chroot(2) security function:  Refuse if
788  * any filedescriptors are open directories.
789  */
790 static int
791 chroot_refuse_vdir_fds(fdp)
792 	struct filedesc *fdp;
793 {
794 	struct vnode *vp;
795 	struct file *fp;
796 	int fd;
797 
798 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
799 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
800 		fp = fget_locked(fdp, fd);
801 		if (fp == NULL)
802 			continue;
803 		if (fp->f_type == DTYPE_VNODE) {
804 			vp = fp->f_vnode;
805 			if (vp->v_type == VDIR)
806 				return (EPERM);
807 		}
808 	}
809 	return (0);
810 }
811 
812 /*
813  * This sysctl determines if we will allow a process to chroot(2) if it
814  * has a directory open:
815  *	0: disallowed for all processes.
816  *	1: allowed for processes that were not already chroot(2)'ed.
817  *	2: allowed for all processes.
818  */
819 
820 static int chroot_allow_open_directories = 1;
821 
822 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
823      &chroot_allow_open_directories, 0, "");
824 
825 /*
826  * Change notion of root (``/'') directory.
827  */
828 #ifndef _SYS_SYSPROTO_H_
829 struct chroot_args {
830 	char	*path;
831 };
832 #endif
833 int
834 chroot(td, uap)
835 	struct thread *td;
836 	struct chroot_args /* {
837 		char *path;
838 	} */ *uap;
839 {
840 	int error;
841 	struct nameidata nd;
842 	int vfslocked;
843 
844 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
845 	if (error)
846 		return (error);
847 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
848 	    UIO_USERSPACE, uap->path, td);
849 	error = namei(&nd);
850 	if (error)
851 		goto error;
852 	vfslocked = NDHASGIANT(&nd);
853 	if ((error = change_dir(nd.ni_vp, td)) != 0)
854 		goto e_vunlock;
855 #ifdef MAC
856 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
857 		goto e_vunlock;
858 #endif
859 	VOP_UNLOCK(nd.ni_vp, 0, td);
860 	error = change_root(nd.ni_vp, td);
861 	vrele(nd.ni_vp);
862 	VFS_UNLOCK_GIANT(vfslocked);
863 	NDFREE(&nd, NDF_ONLY_PNBUF);
864 	return (error);
865 e_vunlock:
866 	vput(nd.ni_vp);
867 	VFS_UNLOCK_GIANT(vfslocked);
868 error:
869 	NDFREE(&nd, NDF_ONLY_PNBUF);
870 	return (error);
871 }
872 
873 /*
874  * Common routine for chroot and chdir.  Callers must provide a locked vnode
875  * instance.
876  */
877 int
878 change_dir(vp, td)
879 	struct vnode *vp;
880 	struct thread *td;
881 {
882 	int error;
883 
884 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
885 	if (vp->v_type != VDIR)
886 		return (ENOTDIR);
887 #ifdef MAC
888 	error = mac_check_vnode_chdir(td->td_ucred, vp);
889 	if (error)
890 		return (error);
891 #endif
892 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
893 	return (error);
894 }
895 
896 /*
897  * Common routine for kern_chroot() and jail_attach().  The caller is
898  * responsible for invoking suser() and mac_check_chroot() to authorize this
899  * operation.
900  */
901 int
902 change_root(vp, td)
903 	struct vnode *vp;
904 	struct thread *td;
905 {
906 	struct filedesc *fdp;
907 	struct vnode *oldvp;
908 	int vfslocked;
909 	int error;
910 
911 	VFS_ASSERT_GIANT(vp->v_mount);
912 	fdp = td->td_proc->p_fd;
913 	FILEDESC_LOCK(fdp);
914 	if (chroot_allow_open_directories == 0 ||
915 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
916 		error = chroot_refuse_vdir_fds(fdp);
917 		if (error) {
918 			FILEDESC_UNLOCK(fdp);
919 			return (error);
920 		}
921 	}
922 	oldvp = fdp->fd_rdir;
923 	fdp->fd_rdir = vp;
924 	VREF(fdp->fd_rdir);
925 	if (!fdp->fd_jdir) {
926 		fdp->fd_jdir = vp;
927 		VREF(fdp->fd_jdir);
928 	}
929 	FILEDESC_UNLOCK(fdp);
930 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
931 	vrele(oldvp);
932 	VFS_UNLOCK_GIANT(vfslocked);
933 	return (0);
934 }
935 
936 /*
937  * Check permissions, allocate an open file structure,
938  * and call the device open routine if any.
939  *
940  * MP SAFE
941  */
942 #ifndef _SYS_SYSPROTO_H_
943 struct open_args {
944 	char	*path;
945 	int	flags;
946 	int	mode;
947 };
948 #endif
949 int
950 open(td, uap)
951 	struct thread *td;
952 	register struct open_args /* {
953 		char *path;
954 		int flags;
955 		int mode;
956 	} */ *uap;
957 {
958 
959 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
960 }
961 
962 int
963 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
964     int mode)
965 {
966 	struct proc *p = td->td_proc;
967 	struct filedesc *fdp = p->p_fd;
968 	struct file *fp;
969 	struct vnode *vp;
970 	struct vattr vat;
971 	struct mount *mp;
972 	int cmode;
973 	struct file *nfp;
974 	int type, indx, error;
975 	struct flock lf;
976 	struct nameidata nd;
977 	int vfslocked;
978 
979 	AUDIT_ARG(fflags, flags);
980 	AUDIT_ARG(mode, mode);
981 	if ((flags & O_ACCMODE) == O_ACCMODE)
982 		return (EINVAL);
983 	flags = FFLAGS(flags);
984 	error = falloc(td, &nfp, &indx);
985 	if (error)
986 		return (error);
987 	/* An extra reference on `nfp' has been held for us by falloc(). */
988 	fp = nfp;
989 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
990 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
991 	td->td_dupfd = -1;		/* XXX check for fdopen */
992 	error = vn_open(&nd, &flags, cmode, indx);
993 	if (error) {
994 		/*
995 		 * If the vn_open replaced the method vector, something
996 		 * wonderous happened deep below and we just pass it up
997 		 * pretending we know what we do.
998 		 */
999 		if (error == ENXIO && fp->f_ops != &badfileops) {
1000 			fdrop(fp, td);
1001 			td->td_retval[0] = indx;
1002 			return (0);
1003 		}
1004 
1005 		/*
1006 		 * release our own reference
1007 		 */
1008 		fdrop(fp, td);
1009 
1010 		/*
1011 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1012 		 * responsible for dropping the old contents of ofiles[indx]
1013 		 * if it succeeds.
1014 		 */
1015 		if ((error == ENODEV || error == ENXIO) &&
1016 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1017 		    (error =
1018 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1019 			td->td_retval[0] = indx;
1020 			return (0);
1021 		}
1022 		/*
1023 		 * Clean up the descriptor, but only if another thread hadn't
1024 		 * replaced or closed it.
1025 		 */
1026 		fdclose(fdp, fp, indx, td);
1027 
1028 		if (error == ERESTART)
1029 			error = EINTR;
1030 		return (error);
1031 	}
1032 	td->td_dupfd = 0;
1033 	vfslocked = NDHASGIANT(&nd);
1034 	NDFREE(&nd, NDF_ONLY_PNBUF);
1035 	vp = nd.ni_vp;
1036 
1037 	/*
1038 	 * There should be 2 references on the file, one from the descriptor
1039 	 * table, and one for us.
1040 	 *
1041 	 * Handle the case where someone closed the file (via its file
1042 	 * descriptor) while we were blocked.  The end result should look
1043 	 * like opening the file succeeded but it was immediately closed.
1044 	 * We call vn_close() manually because we haven't yet hooked up
1045 	 * the various 'struct file' fields.
1046 	 */
1047 	FILEDESC_LOCK(fdp);
1048 	FILE_LOCK(fp);
1049 	if (fp->f_count == 1) {
1050 		mp = vp->v_mount;
1051 		KASSERT(fdp->fd_ofiles[indx] != fp,
1052 		    ("Open file descriptor lost all refs"));
1053 		FILE_UNLOCK(fp);
1054 		FILEDESC_UNLOCK(fdp);
1055 		VOP_UNLOCK(vp, 0, td);
1056 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1057 		VFS_UNLOCK_GIANT(vfslocked);
1058 		fdrop(fp, td);
1059 		td->td_retval[0] = indx;
1060 		return (0);
1061 	}
1062 	fp->f_vnode = vp;
1063 	if (fp->f_data == NULL)
1064 		fp->f_data = vp;
1065 	fp->f_flag = flags & FMASK;
1066 	if (fp->f_ops == &badfileops)
1067 		fp->f_ops = &vnops;
1068 	fp->f_seqcount = 1;
1069 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1070 	FILE_UNLOCK(fp);
1071 	FILEDESC_UNLOCK(fdp);
1072 
1073 	VOP_UNLOCK(vp, 0, td);
1074 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1075 		lf.l_whence = SEEK_SET;
1076 		lf.l_start = 0;
1077 		lf.l_len = 0;
1078 		if (flags & O_EXLOCK)
1079 			lf.l_type = F_WRLCK;
1080 		else
1081 			lf.l_type = F_RDLCK;
1082 		type = F_FLOCK;
1083 		if ((flags & FNONBLOCK) == 0)
1084 			type |= F_WAIT;
1085 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1086 			    type)) != 0)
1087 			goto bad;
1088 		fp->f_flag |= FHASLOCK;
1089 	}
1090 	if (flags & O_TRUNC) {
1091 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1092 			goto bad;
1093 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1094 		VATTR_NULL(&vat);
1095 		vat.va_size = 0;
1096 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1097 #ifdef MAC
1098 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1099 		if (error == 0)
1100 #endif
1101 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1102 		VOP_UNLOCK(vp, 0, td);
1103 		vn_finished_write(mp);
1104 		if (error)
1105 			goto bad;
1106 	}
1107 	VFS_UNLOCK_GIANT(vfslocked);
1108 	/*
1109 	 * Release our private reference, leaving the one associated with
1110 	 * the descriptor table intact.
1111 	 */
1112 	fdrop(fp, td);
1113 	td->td_retval[0] = indx;
1114 	return (0);
1115 bad:
1116 	VFS_UNLOCK_GIANT(vfslocked);
1117 	fdclose(fdp, fp, indx, td);
1118 	fdrop(fp, td);
1119 	return (error);
1120 }
1121 
1122 #ifdef COMPAT_43
1123 /*
1124  * Create a file.
1125  *
1126  * MP SAFE
1127  */
1128 #ifndef _SYS_SYSPROTO_H_
1129 struct ocreat_args {
1130 	char	*path;
1131 	int	mode;
1132 };
1133 #endif
1134 int
1135 ocreat(td, uap)
1136 	struct thread *td;
1137 	register struct ocreat_args /* {
1138 		char *path;
1139 		int mode;
1140 	} */ *uap;
1141 {
1142 
1143 	return (kern_open(td, uap->path, UIO_USERSPACE,
1144 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1145 }
1146 #endif /* COMPAT_43 */
1147 
1148 /*
1149  * Create a special file.
1150  */
1151 #ifndef _SYS_SYSPROTO_H_
1152 struct mknod_args {
1153 	char	*path;
1154 	int	mode;
1155 	int	dev;
1156 };
1157 #endif
1158 int
1159 mknod(td, uap)
1160 	struct thread *td;
1161 	register struct mknod_args /* {
1162 		char *path;
1163 		int mode;
1164 		int dev;
1165 	} */ *uap;
1166 {
1167 
1168 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1169 }
1170 
1171 int
1172 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1173     int dev)
1174 {
1175 	struct vnode *vp;
1176 	struct mount *mp;
1177 	struct vattr vattr;
1178 	int error;
1179 	int whiteout = 0;
1180 	struct nameidata nd;
1181 	int vfslocked;
1182 
1183 	AUDIT_ARG(mode, mode);
1184 	AUDIT_ARG(dev, dev);
1185 	switch (mode & S_IFMT) {
1186 	case S_IFCHR:
1187 	case S_IFBLK:
1188 		error = suser(td);
1189 		break;
1190 	default:
1191 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1192 		break;
1193 	}
1194 	if (error)
1195 		return (error);
1196 restart:
1197 	bwillwrite();
1198 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1199 	    pathseg, path, td);
1200 	if ((error = namei(&nd)) != 0)
1201 		return (error);
1202 	vfslocked = NDHASGIANT(&nd);
1203 	vp = nd.ni_vp;
1204 	if (vp != NULL) {
1205 		NDFREE(&nd, NDF_ONLY_PNBUF);
1206 		if (vp == nd.ni_dvp)
1207 			vrele(nd.ni_dvp);
1208 		else
1209 			vput(nd.ni_dvp);
1210 		vrele(vp);
1211 		VFS_UNLOCK_GIANT(vfslocked);
1212 		return (EEXIST);
1213 	} else {
1214 		VATTR_NULL(&vattr);
1215 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1216 		vattr.va_mode = (mode & ALLPERMS) &
1217 		    ~td->td_proc->p_fd->fd_cmask;
1218 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1219 		vattr.va_rdev = dev;
1220 		whiteout = 0;
1221 
1222 		switch (mode & S_IFMT) {
1223 		case S_IFMT:	/* used by badsect to flag bad sectors */
1224 			vattr.va_type = VBAD;
1225 			break;
1226 		case S_IFCHR:
1227 			vattr.va_type = VCHR;
1228 			break;
1229 		case S_IFBLK:
1230 			vattr.va_type = VBLK;
1231 			break;
1232 		case S_IFWHT:
1233 			whiteout = 1;
1234 			break;
1235 		default:
1236 			error = EINVAL;
1237 			break;
1238 		}
1239 	}
1240 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1241 		NDFREE(&nd, NDF_ONLY_PNBUF);
1242 		vput(nd.ni_dvp);
1243 		VFS_UNLOCK_GIANT(vfslocked);
1244 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1245 			return (error);
1246 		goto restart;
1247 	}
1248 #ifdef MAC
1249 	if (error == 0 && !whiteout)
1250 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1251 		    &nd.ni_cnd, &vattr);
1252 #endif
1253 	if (!error) {
1254 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1255 		if (whiteout)
1256 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1257 		else {
1258 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1259 						&nd.ni_cnd, &vattr);
1260 			if (error == 0)
1261 				vput(nd.ni_vp);
1262 		}
1263 	}
1264 	NDFREE(&nd, NDF_ONLY_PNBUF);
1265 	vput(nd.ni_dvp);
1266 	vn_finished_write(mp);
1267 	VFS_UNLOCK_GIANT(vfslocked);
1268 	return (error);
1269 }
1270 
1271 /*
1272  * Create a named pipe.
1273  */
1274 #ifndef _SYS_SYSPROTO_H_
1275 struct mkfifo_args {
1276 	char	*path;
1277 	int	mode;
1278 };
1279 #endif
1280 int
1281 mkfifo(td, uap)
1282 	struct thread *td;
1283 	register struct mkfifo_args /* {
1284 		char *path;
1285 		int mode;
1286 	} */ *uap;
1287 {
1288 
1289 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1290 }
1291 
1292 int
1293 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1294 {
1295 	struct mount *mp;
1296 	struct vattr vattr;
1297 	int error;
1298 	struct nameidata nd;
1299 	int vfslocked;
1300 
1301 	AUDIT_ARG(mode, mode);
1302 restart:
1303 	bwillwrite();
1304 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1305 	    pathseg, path, td);
1306 	if ((error = namei(&nd)) != 0)
1307 		return (error);
1308 	vfslocked = NDHASGIANT(&nd);
1309 	if (nd.ni_vp != NULL) {
1310 		NDFREE(&nd, NDF_ONLY_PNBUF);
1311 		if (nd.ni_vp == nd.ni_dvp)
1312 			vrele(nd.ni_dvp);
1313 		else
1314 			vput(nd.ni_dvp);
1315 		vrele(nd.ni_vp);
1316 		VFS_UNLOCK_GIANT(vfslocked);
1317 		return (EEXIST);
1318 	}
1319 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1320 		NDFREE(&nd, NDF_ONLY_PNBUF);
1321 		vput(nd.ni_dvp);
1322 		VFS_UNLOCK_GIANT(vfslocked);
1323 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1324 			return (error);
1325 		goto restart;
1326 	}
1327 	VATTR_NULL(&vattr);
1328 	vattr.va_type = VFIFO;
1329 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1330 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1331 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1332 #ifdef MAC
1333 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1334 	    &vattr);
1335 	if (error)
1336 		goto out;
1337 #endif
1338 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1339 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1340 	if (error == 0)
1341 		vput(nd.ni_vp);
1342 #ifdef MAC
1343 out:
1344 #endif
1345 	vput(nd.ni_dvp);
1346 	vn_finished_write(mp);
1347 	VFS_UNLOCK_GIANT(vfslocked);
1348 	NDFREE(&nd, NDF_ONLY_PNBUF);
1349 	return (error);
1350 }
1351 
1352 /*
1353  * Make a hard file link.
1354  */
1355 #ifndef _SYS_SYSPROTO_H_
1356 struct link_args {
1357 	char	*path;
1358 	char	*link;
1359 };
1360 #endif
1361 int
1362 link(td, uap)
1363 	struct thread *td;
1364 	register struct link_args /* {
1365 		char *path;
1366 		char *link;
1367 	} */ *uap;
1368 {
1369 	int error;
1370 
1371 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1372 	return (error);
1373 }
1374 
1375 SYSCTL_DECL(_security_bsd);
1376 
1377 static int hardlink_check_uid = 0;
1378 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1379     &hardlink_check_uid, 0,
1380     "Unprivileged processes cannot create hard links to files owned by other "
1381     "users");
1382 static int hardlink_check_gid = 0;
1383 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1384     &hardlink_check_gid, 0,
1385     "Unprivileged processes cannot create hard links to files owned by other "
1386     "groups");
1387 
1388 static int
1389 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1390 {
1391 	struct vattr va;
1392 	int error;
1393 
1394 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1395 		return (0);
1396 
1397 	if (!hardlink_check_uid && !hardlink_check_gid)
1398 		return (0);
1399 
1400 	error = VOP_GETATTR(vp, &va, cred, td);
1401 	if (error != 0)
1402 		return (error);
1403 
1404 	if (hardlink_check_uid) {
1405 		if (cred->cr_uid != va.va_uid)
1406 			return (EPERM);
1407 	}
1408 
1409 	if (hardlink_check_gid) {
1410 		if (!groupmember(va.va_gid, cred))
1411 			return (EPERM);
1412 	}
1413 
1414 	return (0);
1415 }
1416 
1417 int
1418 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1419 {
1420 	struct vnode *vp;
1421 	struct mount *mp;
1422 	struct nameidata nd;
1423 	int vfslocked;
1424 	int lvfslocked;
1425 	int error;
1426 
1427 	bwillwrite();
1428 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1429 	if ((error = namei(&nd)) != 0)
1430 		return (error);
1431 	vfslocked = NDHASGIANT(&nd);
1432 	NDFREE(&nd, NDF_ONLY_PNBUF);
1433 	vp = nd.ni_vp;
1434 	if (vp->v_type == VDIR) {
1435 		vrele(vp);
1436 		VFS_UNLOCK_GIANT(vfslocked);
1437 		return (EPERM);		/* POSIX */
1438 	}
1439 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1440 		vrele(vp);
1441 		VFS_UNLOCK_GIANT(vfslocked);
1442 		return (error);
1443 	}
1444 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1445 	    segflg, link, td);
1446 	if ((error = namei(&nd)) == 0) {
1447 		lvfslocked = NDHASGIANT(&nd);
1448 		if (nd.ni_vp != NULL) {
1449 			if (nd.ni_dvp == nd.ni_vp)
1450 				vrele(nd.ni_dvp);
1451 			else
1452 				vput(nd.ni_dvp);
1453 			vrele(nd.ni_vp);
1454 			error = EEXIST;
1455 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1456 		    == 0) {
1457 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1458 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1459 			error = can_hardlink(vp, td, td->td_ucred);
1460 			if (error == 0)
1461 #ifdef MAC
1462 				error = mac_check_vnode_link(td->td_ucred,
1463 				    nd.ni_dvp, vp, &nd.ni_cnd);
1464 			if (error == 0)
1465 #endif
1466 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1467 			VOP_UNLOCK(vp, 0, td);
1468 			vput(nd.ni_dvp);
1469 		}
1470 		NDFREE(&nd, NDF_ONLY_PNBUF);
1471 		VFS_UNLOCK_GIANT(lvfslocked);
1472 	}
1473 	vrele(vp);
1474 	vn_finished_write(mp);
1475 	VFS_UNLOCK_GIANT(vfslocked);
1476 	return (error);
1477 }
1478 
1479 /*
1480  * Make a symbolic link.
1481  */
1482 #ifndef _SYS_SYSPROTO_H_
1483 struct symlink_args {
1484 	char	*path;
1485 	char	*link;
1486 };
1487 #endif
1488 int
1489 symlink(td, uap)
1490 	struct thread *td;
1491 	register struct symlink_args /* {
1492 		char *path;
1493 		char *link;
1494 	} */ *uap;
1495 {
1496 
1497 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1498 }
1499 
1500 int
1501 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1502 {
1503 	struct mount *mp;
1504 	struct vattr vattr;
1505 	char *syspath;
1506 	int error;
1507 	struct nameidata nd;
1508 	int vfslocked;
1509 
1510 	if (segflg == UIO_SYSSPACE) {
1511 		syspath = path;
1512 	} else {
1513 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1514 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1515 			goto out;
1516 	}
1517 	AUDIT_ARG(text, syspath);
1518 restart:
1519 	bwillwrite();
1520 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1521 	    segflg, link, td);
1522 	if ((error = namei(&nd)) != 0)
1523 		goto out;
1524 	vfslocked = NDHASGIANT(&nd);
1525 	if (nd.ni_vp) {
1526 		NDFREE(&nd, NDF_ONLY_PNBUF);
1527 		if (nd.ni_vp == nd.ni_dvp)
1528 			vrele(nd.ni_dvp);
1529 		else
1530 			vput(nd.ni_dvp);
1531 		vrele(nd.ni_vp);
1532 		VFS_UNLOCK_GIANT(vfslocked);
1533 		error = EEXIST;
1534 		goto out;
1535 	}
1536 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1537 		NDFREE(&nd, NDF_ONLY_PNBUF);
1538 		vput(nd.ni_dvp);
1539 		VFS_UNLOCK_GIANT(vfslocked);
1540 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1541 			goto out;
1542 		goto restart;
1543 	}
1544 	VATTR_NULL(&vattr);
1545 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1546 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1547 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1548 #ifdef MAC
1549 	vattr.va_type = VLNK;
1550 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1551 	    &vattr);
1552 	if (error)
1553 		goto out2;
1554 #endif
1555 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1556 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1557 	if (error == 0)
1558 		vput(nd.ni_vp);
1559 #ifdef MAC
1560 out2:
1561 #endif
1562 	NDFREE(&nd, NDF_ONLY_PNBUF);
1563 	vput(nd.ni_dvp);
1564 	vn_finished_write(mp);
1565 	VFS_UNLOCK_GIANT(vfslocked);
1566 out:
1567 	if (segflg != UIO_SYSSPACE)
1568 		uma_zfree(namei_zone, syspath);
1569 	return (error);
1570 }
1571 
1572 /*
1573  * Delete a whiteout from the filesystem.
1574  */
1575 int
1576 undelete(td, uap)
1577 	struct thread *td;
1578 	register struct undelete_args /* {
1579 		char *path;
1580 	} */ *uap;
1581 {
1582 	int error;
1583 	struct mount *mp;
1584 	struct nameidata nd;
1585 	int vfslocked;
1586 
1587 restart:
1588 	bwillwrite();
1589 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1590 	    UIO_USERSPACE, uap->path, td);
1591 	error = namei(&nd);
1592 	if (error)
1593 		return (error);
1594 	vfslocked = NDHASGIANT(&nd);
1595 
1596 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1597 		NDFREE(&nd, NDF_ONLY_PNBUF);
1598 		if (nd.ni_vp == nd.ni_dvp)
1599 			vrele(nd.ni_dvp);
1600 		else
1601 			vput(nd.ni_dvp);
1602 		if (nd.ni_vp)
1603 			vrele(nd.ni_vp);
1604 		VFS_UNLOCK_GIANT(vfslocked);
1605 		return (EEXIST);
1606 	}
1607 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1608 		NDFREE(&nd, NDF_ONLY_PNBUF);
1609 		vput(nd.ni_dvp);
1610 		VFS_UNLOCK_GIANT(vfslocked);
1611 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1612 			return (error);
1613 		goto restart;
1614 	}
1615 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1616 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1617 	NDFREE(&nd, NDF_ONLY_PNBUF);
1618 	vput(nd.ni_dvp);
1619 	vn_finished_write(mp);
1620 	VFS_UNLOCK_GIANT(vfslocked);
1621 	return (error);
1622 }
1623 
1624 /*
1625  * Delete a name from the filesystem.
1626  */
1627 #ifndef _SYS_SYSPROTO_H_
1628 struct unlink_args {
1629 	char	*path;
1630 };
1631 #endif
1632 int
1633 unlink(td, uap)
1634 	struct thread *td;
1635 	struct unlink_args /* {
1636 		char *path;
1637 	} */ *uap;
1638 {
1639 	int error;
1640 
1641 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1642 	return (error);
1643 }
1644 
1645 int
1646 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1647 {
1648 	struct mount *mp;
1649 	struct vnode *vp;
1650 	int error;
1651 	struct nameidata nd;
1652 	int vfslocked;
1653 
1654 restart:
1655 	bwillwrite();
1656 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1657 	    pathseg, path, td);
1658 	if ((error = namei(&nd)) != 0)
1659 		return (error == EINVAL ? EPERM : error);
1660 	vfslocked = NDHASGIANT(&nd);
1661 	vp = nd.ni_vp;
1662 	if (vp->v_type == VDIR)
1663 		error = EPERM;		/* POSIX */
1664 	else {
1665 		/*
1666 		 * The root of a mounted filesystem cannot be deleted.
1667 		 *
1668 		 * XXX: can this only be a VDIR case?
1669 		 */
1670 		if (vp->v_vflag & VV_ROOT)
1671 			error = EBUSY;
1672 	}
1673 	if (error == 0) {
1674 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1675 			NDFREE(&nd, NDF_ONLY_PNBUF);
1676 			vput(nd.ni_dvp);
1677 			if (vp == nd.ni_dvp)
1678 				vrele(vp);
1679 			else
1680 				vput(vp);
1681 			VFS_UNLOCK_GIANT(vfslocked);
1682 			if ((error = vn_start_write(NULL, &mp,
1683 			    V_XSLEEP | PCATCH)) != 0)
1684 				return (error);
1685 			goto restart;
1686 		}
1687 #ifdef MAC
1688 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1689 		    &nd.ni_cnd);
1690 		if (error)
1691 			goto out;
1692 #endif
1693 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1694 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1695 #ifdef MAC
1696 out:
1697 #endif
1698 		vn_finished_write(mp);
1699 	}
1700 	NDFREE(&nd, NDF_ONLY_PNBUF);
1701 	vput(nd.ni_dvp);
1702 	if (vp == nd.ni_dvp)
1703 		vrele(vp);
1704 	else
1705 		vput(vp);
1706 	VFS_UNLOCK_GIANT(vfslocked);
1707 	return (error);
1708 }
1709 
1710 /*
1711  * Reposition read/write file offset.
1712  */
1713 #ifndef _SYS_SYSPROTO_H_
1714 struct lseek_args {
1715 	int	fd;
1716 	int	pad;
1717 	off_t	offset;
1718 	int	whence;
1719 };
1720 #endif
1721 int
1722 lseek(td, uap)
1723 	struct thread *td;
1724 	register struct lseek_args /* {
1725 		int fd;
1726 		int pad;
1727 		off_t offset;
1728 		int whence;
1729 	} */ *uap;
1730 {
1731 	struct ucred *cred = td->td_ucred;
1732 	struct file *fp;
1733 	struct vnode *vp;
1734 	struct vattr vattr;
1735 	off_t offset;
1736 	int error, noneg;
1737 	int vfslocked;
1738 
1739 	if ((error = fget(td, uap->fd, &fp)) != 0)
1740 		return (error);
1741 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1742 		fdrop(fp, td);
1743 		return (ESPIPE);
1744 	}
1745 	vp = fp->f_vnode;
1746 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1747 	noneg = (vp->v_type != VCHR);
1748 	offset = uap->offset;
1749 	switch (uap->whence) {
1750 	case L_INCR:
1751 		if (noneg &&
1752 		    (fp->f_offset < 0 ||
1753 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1754 			error = EOVERFLOW;
1755 			break;
1756 		}
1757 		offset += fp->f_offset;
1758 		break;
1759 	case L_XTND:
1760 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1761 		error = VOP_GETATTR(vp, &vattr, cred, td);
1762 		VOP_UNLOCK(vp, 0, td);
1763 		if (error)
1764 			break;
1765 		if (noneg &&
1766 		    (vattr.va_size > OFF_MAX ||
1767 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1768 			error = EOVERFLOW;
1769 			break;
1770 		}
1771 		offset += vattr.va_size;
1772 		break;
1773 	case L_SET:
1774 		break;
1775 	default:
1776 		error = EINVAL;
1777 	}
1778 	if (error == 0 && noneg && offset < 0)
1779 		error = EINVAL;
1780 	if (error != 0)
1781 		goto drop;
1782 	fp->f_offset = offset;
1783 	*(off_t *)(td->td_retval) = fp->f_offset;
1784 drop:
1785 	fdrop(fp, td);
1786 	VFS_UNLOCK_GIANT(vfslocked);
1787 	return (error);
1788 }
1789 
1790 #if defined(COMPAT_43)
1791 /*
1792  * Reposition read/write file offset.
1793  */
1794 #ifndef _SYS_SYSPROTO_H_
1795 struct olseek_args {
1796 	int	fd;
1797 	long	offset;
1798 	int	whence;
1799 };
1800 #endif
1801 int
1802 olseek(td, uap)
1803 	struct thread *td;
1804 	register struct olseek_args /* {
1805 		int fd;
1806 		long offset;
1807 		int whence;
1808 	} */ *uap;
1809 {
1810 	struct lseek_args /* {
1811 		int fd;
1812 		int pad;
1813 		off_t offset;
1814 		int whence;
1815 	} */ nuap;
1816 	int error;
1817 
1818 	nuap.fd = uap->fd;
1819 	nuap.offset = uap->offset;
1820 	nuap.whence = uap->whence;
1821 	error = lseek(td, &nuap);
1822 	return (error);
1823 }
1824 #endif /* COMPAT_43 */
1825 
1826 /*
1827  * Check access permissions using passed credentials.
1828  */
1829 static int
1830 vn_access(vp, user_flags, cred, td)
1831 	struct vnode	*vp;
1832 	int		user_flags;
1833 	struct ucred	*cred;
1834 	struct thread	*td;
1835 {
1836 	int error, flags;
1837 
1838 	/* Flags == 0 means only check for existence. */
1839 	error = 0;
1840 	if (user_flags) {
1841 		flags = 0;
1842 		if (user_flags & R_OK)
1843 			flags |= VREAD;
1844 		if (user_flags & W_OK)
1845 			flags |= VWRITE;
1846 		if (user_flags & X_OK)
1847 			flags |= VEXEC;
1848 #ifdef MAC
1849 		error = mac_check_vnode_access(cred, vp, flags);
1850 		if (error)
1851 			return (error);
1852 #endif
1853 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1854 			error = VOP_ACCESS(vp, flags, cred, td);
1855 	}
1856 	return (error);
1857 }
1858 
1859 /*
1860  * Check access permissions using "real" credentials.
1861  */
1862 #ifndef _SYS_SYSPROTO_H_
1863 struct access_args {
1864 	char	*path;
1865 	int	flags;
1866 };
1867 #endif
1868 int
1869 access(td, uap)
1870 	struct thread *td;
1871 	register struct access_args /* {
1872 		char *path;
1873 		int flags;
1874 	} */ *uap;
1875 {
1876 
1877 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1878 }
1879 
1880 int
1881 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1882 {
1883 	struct ucred *cred, *tmpcred;
1884 	register struct vnode *vp;
1885 	struct nameidata nd;
1886 	int vfslocked;
1887 	int error;
1888 
1889 	/*
1890 	 * Create and modify a temporary credential instead of one that
1891 	 * is potentially shared.  This could also mess up socket
1892 	 * buffer accounting which can run in an interrupt context.
1893 	 */
1894 	cred = td->td_ucred;
1895 	tmpcred = crdup(cred);
1896 	tmpcred->cr_uid = cred->cr_ruid;
1897 	tmpcred->cr_groups[0] = cred->cr_rgid;
1898 	td->td_ucred = tmpcred;
1899 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1900 	    pathseg, path, td);
1901 	if ((error = namei(&nd)) != 0)
1902 		goto out1;
1903 	vfslocked = NDHASGIANT(&nd);
1904 	vp = nd.ni_vp;
1905 
1906 	error = vn_access(vp, flags, tmpcred, td);
1907 	NDFREE(&nd, NDF_ONLY_PNBUF);
1908 	vput(vp);
1909 	VFS_UNLOCK_GIANT(vfslocked);
1910 out1:
1911 	td->td_ucred = cred;
1912 	crfree(tmpcred);
1913 	return (error);
1914 }
1915 
1916 /*
1917  * Check access permissions using "effective" credentials.
1918  */
1919 #ifndef _SYS_SYSPROTO_H_
1920 struct eaccess_args {
1921 	char	*path;
1922 	int	flags;
1923 };
1924 #endif
1925 int
1926 eaccess(td, uap)
1927 	struct thread *td;
1928 	register struct eaccess_args /* {
1929 		char *path;
1930 		int flags;
1931 	} */ *uap;
1932 {
1933 
1934 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1935 }
1936 
1937 int
1938 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1939 {
1940 	struct nameidata nd;
1941 	struct vnode *vp;
1942 	int vfslocked;
1943 	int error;
1944 
1945 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1946 	    pathseg, path, td);
1947 	if ((error = namei(&nd)) != 0)
1948 		return (error);
1949 	vp = nd.ni_vp;
1950 	vfslocked = NDHASGIANT(&nd);
1951 	error = vn_access(vp, flags, td->td_ucred, td);
1952 	NDFREE(&nd, NDF_ONLY_PNBUF);
1953 	vput(vp);
1954 	VFS_UNLOCK_GIANT(vfslocked);
1955 	return (error);
1956 }
1957 
1958 #if defined(COMPAT_43)
1959 /*
1960  * Get file status; this version follows links.
1961  */
1962 #ifndef _SYS_SYSPROTO_H_
1963 struct ostat_args {
1964 	char	*path;
1965 	struct ostat *ub;
1966 };
1967 #endif
1968 int
1969 ostat(td, uap)
1970 	struct thread *td;
1971 	register struct ostat_args /* {
1972 		char *path;
1973 		struct ostat *ub;
1974 	} */ *uap;
1975 {
1976 	struct stat sb;
1977 	struct ostat osb;
1978 	int error;
1979 
1980 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1981 	if (error)
1982 		return (error);
1983 	cvtstat(&sb, &osb);
1984 	error = copyout(&osb, uap->ub, sizeof (osb));
1985 	return (error);
1986 }
1987 
1988 /*
1989  * Get file status; this version does not follow links.
1990  */
1991 #ifndef _SYS_SYSPROTO_H_
1992 struct olstat_args {
1993 	char	*path;
1994 	struct ostat *ub;
1995 };
1996 #endif
1997 int
1998 olstat(td, uap)
1999 	struct thread *td;
2000 	register struct olstat_args /* {
2001 		char *path;
2002 		struct ostat *ub;
2003 	} */ *uap;
2004 {
2005 	struct stat sb;
2006 	struct ostat osb;
2007 	int error;
2008 
2009 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2010 	if (error)
2011 		return (error);
2012 	cvtstat(&sb, &osb);
2013 	error = copyout(&osb, uap->ub, sizeof (osb));
2014 	return (error);
2015 }
2016 
2017 /*
2018  * Convert from an old to a new stat structure.
2019  */
2020 void
2021 cvtstat(st, ost)
2022 	struct stat *st;
2023 	struct ostat *ost;
2024 {
2025 
2026 	ost->st_dev = st->st_dev;
2027 	ost->st_ino = st->st_ino;
2028 	ost->st_mode = st->st_mode;
2029 	ost->st_nlink = st->st_nlink;
2030 	ost->st_uid = st->st_uid;
2031 	ost->st_gid = st->st_gid;
2032 	ost->st_rdev = st->st_rdev;
2033 	if (st->st_size < (quad_t)1 << 32)
2034 		ost->st_size = st->st_size;
2035 	else
2036 		ost->st_size = -2;
2037 	ost->st_atime = st->st_atime;
2038 	ost->st_mtime = st->st_mtime;
2039 	ost->st_ctime = st->st_ctime;
2040 	ost->st_blksize = st->st_blksize;
2041 	ost->st_blocks = st->st_blocks;
2042 	ost->st_flags = st->st_flags;
2043 	ost->st_gen = st->st_gen;
2044 }
2045 #endif /* COMPAT_43 */
2046 
2047 /*
2048  * Get file status; this version follows links.
2049  */
2050 #ifndef _SYS_SYSPROTO_H_
2051 struct stat_args {
2052 	char	*path;
2053 	struct stat *ub;
2054 };
2055 #endif
2056 int
2057 stat(td, uap)
2058 	struct thread *td;
2059 	register struct stat_args /* {
2060 		char *path;
2061 		struct stat *ub;
2062 	} */ *uap;
2063 {
2064 	struct stat sb;
2065 	int error;
2066 
2067 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2068 	if (error == 0)
2069 		error = copyout(&sb, uap->ub, sizeof (sb));
2070 	return (error);
2071 }
2072 
2073 int
2074 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2075 {
2076 	struct nameidata nd;
2077 	struct stat sb;
2078 	int error, vfslocked;
2079 
2080 	NDINIT(&nd, LOOKUP,
2081 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2082 	    pathseg, path, td);
2083 	if ((error = namei(&nd)) != 0)
2084 		return (error);
2085 	vfslocked = NDHASGIANT(&nd);
2086 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2087 	NDFREE(&nd, NDF_ONLY_PNBUF);
2088 	vput(nd.ni_vp);
2089 	VFS_UNLOCK_GIANT(vfslocked);
2090 	if (mtx_owned(&Giant))
2091 		printf("stat(%d): %s\n", vfslocked, path);
2092 	if (error)
2093 		return (error);
2094 	*sbp = sb;
2095 	return (0);
2096 }
2097 
2098 /*
2099  * Get file status; this version does not follow links.
2100  */
2101 #ifndef _SYS_SYSPROTO_H_
2102 struct lstat_args {
2103 	char	*path;
2104 	struct stat *ub;
2105 };
2106 #endif
2107 int
2108 lstat(td, uap)
2109 	struct thread *td;
2110 	register struct lstat_args /* {
2111 		char *path;
2112 		struct stat *ub;
2113 	} */ *uap;
2114 {
2115 	struct stat sb;
2116 	int error;
2117 
2118 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2119 	if (error == 0)
2120 		error = copyout(&sb, uap->ub, sizeof (sb));
2121 	return (error);
2122 }
2123 
2124 int
2125 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2126 {
2127 	struct vnode *vp;
2128 	struct stat sb;
2129 	struct nameidata nd;
2130 	int error, vfslocked;
2131 
2132 	NDINIT(&nd, LOOKUP,
2133 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2134 	    pathseg, path, td);
2135 	if ((error = namei(&nd)) != 0)
2136 		return (error);
2137 	vfslocked = NDHASGIANT(&nd);
2138 	vp = nd.ni_vp;
2139 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2140 	NDFREE(&nd, NDF_ONLY_PNBUF);
2141 	vput(vp);
2142 	VFS_UNLOCK_GIANT(vfslocked);
2143 	if (error)
2144 		return (error);
2145 	*sbp = sb;
2146 	return (0);
2147 }
2148 
2149 /*
2150  * Implementation of the NetBSD [l]stat() functions.
2151  */
2152 void
2153 cvtnstat(sb, nsb)
2154 	struct stat *sb;
2155 	struct nstat *nsb;
2156 {
2157 	bzero(nsb, sizeof *nsb);
2158 	nsb->st_dev = sb->st_dev;
2159 	nsb->st_ino = sb->st_ino;
2160 	nsb->st_mode = sb->st_mode;
2161 	nsb->st_nlink = sb->st_nlink;
2162 	nsb->st_uid = sb->st_uid;
2163 	nsb->st_gid = sb->st_gid;
2164 	nsb->st_rdev = sb->st_rdev;
2165 	nsb->st_atimespec = sb->st_atimespec;
2166 	nsb->st_mtimespec = sb->st_mtimespec;
2167 	nsb->st_ctimespec = sb->st_ctimespec;
2168 	nsb->st_size = sb->st_size;
2169 	nsb->st_blocks = sb->st_blocks;
2170 	nsb->st_blksize = sb->st_blksize;
2171 	nsb->st_flags = sb->st_flags;
2172 	nsb->st_gen = sb->st_gen;
2173 	nsb->st_birthtimespec = sb->st_birthtimespec;
2174 }
2175 
2176 #ifndef _SYS_SYSPROTO_H_
2177 struct nstat_args {
2178 	char	*path;
2179 	struct nstat *ub;
2180 };
2181 #endif
2182 int
2183 nstat(td, uap)
2184 	struct thread *td;
2185 	register struct nstat_args /* {
2186 		char *path;
2187 		struct nstat *ub;
2188 	} */ *uap;
2189 {
2190 	struct stat sb;
2191 	struct nstat nsb;
2192 	int error;
2193 
2194 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2195 	if (error)
2196 		return (error);
2197 	cvtnstat(&sb, &nsb);
2198 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2199 	return (error);
2200 }
2201 
2202 /*
2203  * NetBSD lstat.  Get file status; this version does not follow links.
2204  */
2205 #ifndef _SYS_SYSPROTO_H_
2206 struct lstat_args {
2207 	char	*path;
2208 	struct stat *ub;
2209 };
2210 #endif
2211 int
2212 nlstat(td, uap)
2213 	struct thread *td;
2214 	register struct nlstat_args /* {
2215 		char *path;
2216 		struct nstat *ub;
2217 	} */ *uap;
2218 {
2219 	struct stat sb;
2220 	struct nstat nsb;
2221 	int error;
2222 
2223 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2224 	if (error)
2225 		return (error);
2226 	cvtnstat(&sb, &nsb);
2227 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2228 	return (error);
2229 }
2230 
2231 /*
2232  * Get configurable pathname variables.
2233  */
2234 #ifndef _SYS_SYSPROTO_H_
2235 struct pathconf_args {
2236 	char	*path;
2237 	int	name;
2238 };
2239 #endif
2240 int
2241 pathconf(td, uap)
2242 	struct thread *td;
2243 	register struct pathconf_args /* {
2244 		char *path;
2245 		int name;
2246 	} */ *uap;
2247 {
2248 
2249 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2250 }
2251 
2252 int
2253 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2254 {
2255 	struct nameidata nd;
2256 	int error, vfslocked;
2257 
2258 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2259 	    pathseg, path, td);
2260 	if ((error = namei(&nd)) != 0)
2261 		return (error);
2262 	vfslocked = NDHASGIANT(&nd);
2263 	NDFREE(&nd, NDF_ONLY_PNBUF);
2264 
2265 	/* If asynchronous I/O is available, it works for all files. */
2266 	if (name == _PC_ASYNC_IO)
2267 		td->td_retval[0] = async_io_version;
2268 	else
2269 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2270 	vput(nd.ni_vp);
2271 	VFS_UNLOCK_GIANT(vfslocked);
2272 	return (error);
2273 }
2274 
2275 /*
2276  * Return target name of a symbolic link.
2277  */
2278 #ifndef _SYS_SYSPROTO_H_
2279 struct readlink_args {
2280 	char	*path;
2281 	char	*buf;
2282 	int	count;
2283 };
2284 #endif
2285 int
2286 readlink(td, uap)
2287 	struct thread *td;
2288 	register struct readlink_args /* {
2289 		char *path;
2290 		char *buf;
2291 		int count;
2292 	} */ *uap;
2293 {
2294 
2295 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2296 	    UIO_USERSPACE, uap->count));
2297 }
2298 
2299 int
2300 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2301     enum uio_seg bufseg, int count)
2302 {
2303 	register struct vnode *vp;
2304 	struct iovec aiov;
2305 	struct uio auio;
2306 	int error;
2307 	struct nameidata nd;
2308 	int vfslocked;
2309 
2310 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2311 	    pathseg, path, td);
2312 	if ((error = namei(&nd)) != 0)
2313 		return (error);
2314 	NDFREE(&nd, NDF_ONLY_PNBUF);
2315 	vfslocked = NDHASGIANT(&nd);
2316 	vp = nd.ni_vp;
2317 #ifdef MAC
2318 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2319 	if (error) {
2320 		vput(vp);
2321 		VFS_UNLOCK_GIANT(vfslocked);
2322 		return (error);
2323 	}
2324 #endif
2325 	if (vp->v_type != VLNK)
2326 		error = EINVAL;
2327 	else {
2328 		aiov.iov_base = buf;
2329 		aiov.iov_len = count;
2330 		auio.uio_iov = &aiov;
2331 		auio.uio_iovcnt = 1;
2332 		auio.uio_offset = 0;
2333 		auio.uio_rw = UIO_READ;
2334 		auio.uio_segflg = bufseg;
2335 		auio.uio_td = td;
2336 		auio.uio_resid = count;
2337 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2338 	}
2339 	vput(vp);
2340 	VFS_UNLOCK_GIANT(vfslocked);
2341 	td->td_retval[0] = count - auio.uio_resid;
2342 	return (error);
2343 }
2344 
2345 /*
2346  * Common implementation code for chflags() and fchflags().
2347  */
2348 static int
2349 setfflags(td, vp, flags)
2350 	struct thread *td;
2351 	struct vnode *vp;
2352 	int flags;
2353 {
2354 	int error;
2355 	struct mount *mp;
2356 	struct vattr vattr;
2357 
2358 	/*
2359 	 * Prevent non-root users from setting flags on devices.  When
2360 	 * a device is reused, users can retain ownership of the device
2361 	 * if they are allowed to set flags and programs assume that
2362 	 * chown can't fail when done as root.
2363 	 */
2364 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2365 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2366 		if (error)
2367 			return (error);
2368 	}
2369 
2370 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2371 		return (error);
2372 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2373 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2374 	VATTR_NULL(&vattr);
2375 	vattr.va_flags = flags;
2376 #ifdef MAC
2377 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2378 	if (error == 0)
2379 #endif
2380 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2381 	VOP_UNLOCK(vp, 0, td);
2382 	vn_finished_write(mp);
2383 	return (error);
2384 }
2385 
2386 /*
2387  * Change flags of a file given a path name.
2388  */
2389 #ifndef _SYS_SYSPROTO_H_
2390 struct chflags_args {
2391 	char	*path;
2392 	int	flags;
2393 };
2394 #endif
2395 int
2396 chflags(td, uap)
2397 	struct thread *td;
2398 	register struct chflags_args /* {
2399 		char *path;
2400 		int flags;
2401 	} */ *uap;
2402 {
2403 	int error;
2404 	struct nameidata nd;
2405 	int vfslocked;
2406 
2407 	AUDIT_ARG(fflags, uap->flags);
2408 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2409 	    uap->path, td);
2410 	if ((error = namei(&nd)) != 0)
2411 		return (error);
2412 	NDFREE(&nd, NDF_ONLY_PNBUF);
2413 	vfslocked = NDHASGIANT(&nd);
2414 	error = setfflags(td, nd.ni_vp, uap->flags);
2415 	vrele(nd.ni_vp);
2416 	VFS_UNLOCK_GIANT(vfslocked);
2417 	return (error);
2418 }
2419 
2420 /*
2421  * Same as chflags() but doesn't follow symlinks.
2422  */
2423 int
2424 lchflags(td, uap)
2425 	struct thread *td;
2426 	register struct lchflags_args /* {
2427 		char *path;
2428 		int flags;
2429 	} */ *uap;
2430 {
2431 	int error;
2432 	struct nameidata nd;
2433 	int vfslocked;
2434 
2435 	AUDIT_ARG(fflags, uap->flags);
2436 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2437 	    uap->path, td);
2438 	if ((error = namei(&nd)) != 0)
2439 		return (error);
2440 	vfslocked = NDHASGIANT(&nd);
2441 	NDFREE(&nd, NDF_ONLY_PNBUF);
2442 	error = setfflags(td, nd.ni_vp, uap->flags);
2443 	vrele(nd.ni_vp);
2444 	VFS_UNLOCK_GIANT(vfslocked);
2445 	return (error);
2446 }
2447 
2448 /*
2449  * Change flags of a file given a file descriptor.
2450  */
2451 #ifndef _SYS_SYSPROTO_H_
2452 struct fchflags_args {
2453 	int	fd;
2454 	int	flags;
2455 };
2456 #endif
2457 int
2458 fchflags(td, uap)
2459 	struct thread *td;
2460 	register struct fchflags_args /* {
2461 		int fd;
2462 		int flags;
2463 	} */ *uap;
2464 {
2465 	struct file *fp;
2466 	int vfslocked;
2467 	int error;
2468 
2469 	AUDIT_ARG(fd, uap->fd);
2470 	AUDIT_ARG(fflags, uap->flags);
2471 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2472 		return (error);
2473 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2474 #ifdef AUDIT
2475 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2476 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2477 	VOP_UNLOCK(fp->f_vnode, 0, td);
2478 #endif
2479 	error = setfflags(td, fp->f_vnode, uap->flags);
2480 	VFS_UNLOCK_GIANT(vfslocked);
2481 	fdrop(fp, td);
2482 	return (error);
2483 }
2484 
2485 /*
2486  * Common implementation code for chmod(), lchmod() and fchmod().
2487  */
2488 static int
2489 setfmode(td, vp, mode)
2490 	struct thread *td;
2491 	struct vnode *vp;
2492 	int mode;
2493 {
2494 	int error;
2495 	struct mount *mp;
2496 	struct vattr vattr;
2497 
2498 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2499 		return (error);
2500 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2501 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2502 	VATTR_NULL(&vattr);
2503 	vattr.va_mode = mode & ALLPERMS;
2504 #ifdef MAC
2505 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2506 	if (error == 0)
2507 #endif
2508 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2509 	VOP_UNLOCK(vp, 0, td);
2510 	vn_finished_write(mp);
2511 	return (error);
2512 }
2513 
2514 /*
2515  * Change mode of a file given path name.
2516  */
2517 #ifndef _SYS_SYSPROTO_H_
2518 struct chmod_args {
2519 	char	*path;
2520 	int	mode;
2521 };
2522 #endif
2523 int
2524 chmod(td, uap)
2525 	struct thread *td;
2526 	register struct chmod_args /* {
2527 		char *path;
2528 		int mode;
2529 	} */ *uap;
2530 {
2531 
2532 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2533 }
2534 
2535 int
2536 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2537 {
2538 	int error;
2539 	struct nameidata nd;
2540 	int vfslocked;
2541 
2542 	AUDIT_ARG(mode, mode);
2543 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2544 	if ((error = namei(&nd)) != 0)
2545 		return (error);
2546 	vfslocked = NDHASGIANT(&nd);
2547 	NDFREE(&nd, NDF_ONLY_PNBUF);
2548 	error = setfmode(td, nd.ni_vp, mode);
2549 	vrele(nd.ni_vp);
2550 	VFS_UNLOCK_GIANT(vfslocked);
2551 	return (error);
2552 }
2553 
2554 /*
2555  * Change mode of a file given path name (don't follow links.)
2556  */
2557 #ifndef _SYS_SYSPROTO_H_
2558 struct lchmod_args {
2559 	char	*path;
2560 	int	mode;
2561 };
2562 #endif
2563 int
2564 lchmod(td, uap)
2565 	struct thread *td;
2566 	register struct lchmod_args /* {
2567 		char *path;
2568 		int mode;
2569 	} */ *uap;
2570 {
2571 	int error;
2572 	struct nameidata nd;
2573 	int vfslocked;
2574 
2575 	AUDIT_ARG(mode, (mode_t)uap->mode);
2576 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2577 	    uap->path, td);
2578 	if ((error = namei(&nd)) != 0)
2579 		return (error);
2580 	vfslocked = NDHASGIANT(&nd);
2581 	NDFREE(&nd, NDF_ONLY_PNBUF);
2582 	error = setfmode(td, nd.ni_vp, uap->mode);
2583 	vrele(nd.ni_vp);
2584 	VFS_UNLOCK_GIANT(vfslocked);
2585 	return (error);
2586 }
2587 
2588 /*
2589  * Change mode of a file given a file descriptor.
2590  */
2591 #ifndef _SYS_SYSPROTO_H_
2592 struct fchmod_args {
2593 	int	fd;
2594 	int	mode;
2595 };
2596 #endif
2597 int
2598 fchmod(td, uap)
2599 	struct thread *td;
2600 	register struct fchmod_args /* {
2601 		int fd;
2602 		int mode;
2603 	} */ *uap;
2604 {
2605 	struct file *fp;
2606 	int vfslocked;
2607 	int error;
2608 
2609 	AUDIT_ARG(fd, uap->fd);
2610 	AUDIT_ARG(mode, uap->mode);
2611 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2612 		return (error);
2613 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2614 #ifdef AUDIT
2615 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2616 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2617 	VOP_UNLOCK(fp->f_vnode, 0, td);
2618 #endif
2619 	error = setfmode(td, fp->f_vnode, uap->mode);
2620 	VFS_UNLOCK_GIANT(vfslocked);
2621 	fdrop(fp, td);
2622 	return (error);
2623 }
2624 
2625 /*
2626  * Common implementation for chown(), lchown(), and fchown()
2627  */
2628 static int
2629 setfown(td, vp, uid, gid)
2630 	struct thread *td;
2631 	struct vnode *vp;
2632 	uid_t uid;
2633 	gid_t gid;
2634 {
2635 	int error;
2636 	struct mount *mp;
2637 	struct vattr vattr;
2638 
2639 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2640 		return (error);
2641 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2642 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2643 	VATTR_NULL(&vattr);
2644 	vattr.va_uid = uid;
2645 	vattr.va_gid = gid;
2646 #ifdef MAC
2647 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2648 	    vattr.va_gid);
2649 	if (error == 0)
2650 #endif
2651 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2652 	VOP_UNLOCK(vp, 0, td);
2653 	vn_finished_write(mp);
2654 	return (error);
2655 }
2656 
2657 /*
2658  * Set ownership given a path name.
2659  */
2660 #ifndef _SYS_SYSPROTO_H_
2661 struct chown_args {
2662 	char	*path;
2663 	int	uid;
2664 	int	gid;
2665 };
2666 #endif
2667 int
2668 chown(td, uap)
2669 	struct thread *td;
2670 	register struct chown_args /* {
2671 		char *path;
2672 		int uid;
2673 		int gid;
2674 	} */ *uap;
2675 {
2676 
2677 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2678 }
2679 
2680 int
2681 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2682     int gid)
2683 {
2684 	int error;
2685 	struct nameidata nd;
2686 	int vfslocked;
2687 
2688 	AUDIT_ARG(owner, uid, gid);
2689 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2690 	if ((error = namei(&nd)) != 0)
2691 		return (error);
2692 	vfslocked = NDHASGIANT(&nd);
2693 	NDFREE(&nd, NDF_ONLY_PNBUF);
2694 	error = setfown(td, nd.ni_vp, uid, gid);
2695 	vrele(nd.ni_vp);
2696 	VFS_UNLOCK_GIANT(vfslocked);
2697 	return (error);
2698 }
2699 
2700 /*
2701  * Set ownership given a path name, do not cross symlinks.
2702  */
2703 #ifndef _SYS_SYSPROTO_H_
2704 struct lchown_args {
2705 	char	*path;
2706 	int	uid;
2707 	int	gid;
2708 };
2709 #endif
2710 int
2711 lchown(td, uap)
2712 	struct thread *td;
2713 	register struct lchown_args /* {
2714 		char *path;
2715 		int uid;
2716 		int gid;
2717 	} */ *uap;
2718 {
2719 
2720 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2721 }
2722 
2723 int
2724 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2725     int gid)
2726 {
2727 	int error;
2728 	struct nameidata nd;
2729 	int vfslocked;
2730 
2731 	AUDIT_ARG(owner, uid, gid);
2732 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2733 	if ((error = namei(&nd)) != 0)
2734 		return (error);
2735 	vfslocked = NDHASGIANT(&nd);
2736 	NDFREE(&nd, NDF_ONLY_PNBUF);
2737 	error = setfown(td, nd.ni_vp, uid, gid);
2738 	vrele(nd.ni_vp);
2739 	VFS_UNLOCK_GIANT(vfslocked);
2740 	return (error);
2741 }
2742 
2743 /*
2744  * Set ownership given a file descriptor.
2745  */
2746 #ifndef _SYS_SYSPROTO_H_
2747 struct fchown_args {
2748 	int	fd;
2749 	int	uid;
2750 	int	gid;
2751 };
2752 #endif
2753 int
2754 fchown(td, uap)
2755 	struct thread *td;
2756 	register struct fchown_args /* {
2757 		int fd;
2758 		int uid;
2759 		int gid;
2760 	} */ *uap;
2761 {
2762 	struct file *fp;
2763 	int vfslocked;
2764 	int error;
2765 
2766 	AUDIT_ARG(fd, uap->fd);
2767 	AUDIT_ARG(owner, uap->uid, uap->gid);
2768 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2769 		return (error);
2770 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2771 #ifdef AUDIT
2772 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2773 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2774 	VOP_UNLOCK(fp->f_vnode, 0, td);
2775 #endif
2776 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2777 	VFS_UNLOCK_GIANT(vfslocked);
2778 	fdrop(fp, td);
2779 	return (error);
2780 }
2781 
2782 /*
2783  * Common implementation code for utimes(), lutimes(), and futimes().
2784  */
2785 static int
2786 getutimes(usrtvp, tvpseg, tsp)
2787 	const struct timeval *usrtvp;
2788 	enum uio_seg tvpseg;
2789 	struct timespec *tsp;
2790 {
2791 	struct timeval tv[2];
2792 	const struct timeval *tvp;
2793 	int error;
2794 
2795 	if (usrtvp == NULL) {
2796 		microtime(&tv[0]);
2797 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2798 		tsp[1] = tsp[0];
2799 	} else {
2800 		if (tvpseg == UIO_SYSSPACE) {
2801 			tvp = usrtvp;
2802 		} else {
2803 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2804 				return (error);
2805 			tvp = tv;
2806 		}
2807 
2808 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2809 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2810 			return (EINVAL);
2811 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2812 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2813 	}
2814 	return (0);
2815 }
2816 
2817 /*
2818  * Common implementation code for utimes(), lutimes(), and futimes().
2819  */
2820 static int
2821 setutimes(td, vp, ts, numtimes, nullflag)
2822 	struct thread *td;
2823 	struct vnode *vp;
2824 	const struct timespec *ts;
2825 	int numtimes;
2826 	int nullflag;
2827 {
2828 	int error, setbirthtime;
2829 	struct mount *mp;
2830 	struct vattr vattr;
2831 
2832 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2833 		return (error);
2834 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2835 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2836 	setbirthtime = 0;
2837 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2838 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2839 		setbirthtime = 1;
2840 	VATTR_NULL(&vattr);
2841 	vattr.va_atime = ts[0];
2842 	vattr.va_mtime = ts[1];
2843 	if (setbirthtime)
2844 		vattr.va_birthtime = ts[1];
2845 	if (numtimes > 2)
2846 		vattr.va_birthtime = ts[2];
2847 	if (nullflag)
2848 		vattr.va_vaflags |= VA_UTIMES_NULL;
2849 #ifdef MAC
2850 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2851 	    vattr.va_mtime);
2852 #endif
2853 	if (error == 0)
2854 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2855 	VOP_UNLOCK(vp, 0, td);
2856 	vn_finished_write(mp);
2857 	return (error);
2858 }
2859 
2860 /*
2861  * Set the access and modification times of a file.
2862  */
2863 #ifndef _SYS_SYSPROTO_H_
2864 struct utimes_args {
2865 	char	*path;
2866 	struct	timeval *tptr;
2867 };
2868 #endif
2869 int
2870 utimes(td, uap)
2871 	struct thread *td;
2872 	register struct utimes_args /* {
2873 		char *path;
2874 		struct timeval *tptr;
2875 	} */ *uap;
2876 {
2877 
2878 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2879 	    UIO_USERSPACE));
2880 }
2881 
2882 int
2883 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2884     struct timeval *tptr, enum uio_seg tptrseg)
2885 {
2886 	struct timespec ts[2];
2887 	int error;
2888 	struct nameidata nd;
2889 	int vfslocked;
2890 
2891 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2892 		return (error);
2893 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2894 	if ((error = namei(&nd)) != 0)
2895 		return (error);
2896 	vfslocked = NDHASGIANT(&nd);
2897 	NDFREE(&nd, NDF_ONLY_PNBUF);
2898 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2899 	vrele(nd.ni_vp);
2900 	VFS_UNLOCK_GIANT(vfslocked);
2901 	return (error);
2902 }
2903 
2904 /*
2905  * Set the access and modification times of a file.
2906  */
2907 #ifndef _SYS_SYSPROTO_H_
2908 struct lutimes_args {
2909 	char	*path;
2910 	struct	timeval *tptr;
2911 };
2912 #endif
2913 int
2914 lutimes(td, uap)
2915 	struct thread *td;
2916 	register struct lutimes_args /* {
2917 		char *path;
2918 		struct timeval *tptr;
2919 	} */ *uap;
2920 {
2921 
2922 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2923 	    UIO_USERSPACE));
2924 }
2925 
2926 int
2927 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2928     struct timeval *tptr, enum uio_seg tptrseg)
2929 {
2930 	struct timespec ts[2];
2931 	int error;
2932 	struct nameidata nd;
2933 	int vfslocked;
2934 
2935 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2936 		return (error);
2937 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2938 	if ((error = namei(&nd)) != 0)
2939 		return (error);
2940 	vfslocked = NDHASGIANT(&nd);
2941 	NDFREE(&nd, NDF_ONLY_PNBUF);
2942 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2943 	vrele(nd.ni_vp);
2944 	VFS_UNLOCK_GIANT(vfslocked);
2945 	return (error);
2946 }
2947 
2948 /*
2949  * Set the access and modification times of a file.
2950  */
2951 #ifndef _SYS_SYSPROTO_H_
2952 struct futimes_args {
2953 	int	fd;
2954 	struct	timeval *tptr;
2955 };
2956 #endif
2957 int
2958 futimes(td, uap)
2959 	struct thread *td;
2960 	register struct futimes_args /* {
2961 		int  fd;
2962 		struct timeval *tptr;
2963 	} */ *uap;
2964 {
2965 
2966 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2967 }
2968 
2969 int
2970 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2971     enum uio_seg tptrseg)
2972 {
2973 	struct timespec ts[2];
2974 	struct file *fp;
2975 	int vfslocked;
2976 	int error;
2977 
2978 	AUDIT_ARG(fd, fd);
2979 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2980 		return (error);
2981 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2982 		return (error);
2983 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2984 #ifdef AUDIT
2985 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2986 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2987 	VOP_UNLOCK(fp->f_vnode, 0, td);
2988 #endif
2989 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2990 	VFS_UNLOCK_GIANT(vfslocked);
2991 	fdrop(fp, td);
2992 	return (error);
2993 }
2994 
2995 /*
2996  * Truncate a file given its path name.
2997  */
2998 #ifndef _SYS_SYSPROTO_H_
2999 struct truncate_args {
3000 	char	*path;
3001 	int	pad;
3002 	off_t	length;
3003 };
3004 #endif
3005 int
3006 truncate(td, uap)
3007 	struct thread *td;
3008 	register struct truncate_args /* {
3009 		char *path;
3010 		int pad;
3011 		off_t length;
3012 	} */ *uap;
3013 {
3014 
3015 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3016 }
3017 
3018 int
3019 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3020 {
3021 	struct mount *mp;
3022 	struct vnode *vp;
3023 	struct vattr vattr;
3024 	int error;
3025 	struct nameidata nd;
3026 	int vfslocked;
3027 
3028 	if (length < 0)
3029 		return(EINVAL);
3030 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3031 	if ((error = namei(&nd)) != 0)
3032 		return (error);
3033 	vfslocked = NDHASGIANT(&nd);
3034 	vp = nd.ni_vp;
3035 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3036 		vrele(vp);
3037 		VFS_UNLOCK_GIANT(vfslocked);
3038 		return (error);
3039 	}
3040 	NDFREE(&nd, NDF_ONLY_PNBUF);
3041 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3042 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3043 	if (vp->v_type == VDIR)
3044 		error = EISDIR;
3045 #ifdef MAC
3046 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3047 	}
3048 #endif
3049 	else if ((error = vn_writechk(vp)) == 0 &&
3050 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3051 		VATTR_NULL(&vattr);
3052 		vattr.va_size = length;
3053 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3054 	}
3055 	vput(vp);
3056 	vn_finished_write(mp);
3057 	VFS_UNLOCK_GIANT(vfslocked);
3058 	return (error);
3059 }
3060 
3061 /*
3062  * Truncate a file given a file descriptor.
3063  */
3064 #ifndef _SYS_SYSPROTO_H_
3065 struct ftruncate_args {
3066 	int	fd;
3067 	int	pad;
3068 	off_t	length;
3069 };
3070 #endif
3071 int
3072 ftruncate(td, uap)
3073 	struct thread *td;
3074 	register struct ftruncate_args /* {
3075 		int fd;
3076 		int pad;
3077 		off_t length;
3078 	} */ *uap;
3079 {
3080 	struct mount *mp;
3081 	struct vattr vattr;
3082 	struct vnode *vp;
3083 	struct file *fp;
3084 	int vfslocked;
3085 	int error;
3086 
3087 	AUDIT_ARG(fd, uap->fd);
3088 	if (uap->length < 0)
3089 		return(EINVAL);
3090 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3091 		return (error);
3092 	if ((fp->f_flag & FWRITE) == 0) {
3093 		fdrop(fp, td);
3094 		return (EINVAL);
3095 	}
3096 	vp = fp->f_vnode;
3097 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3098 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3099 		goto drop;
3100 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3101 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3102 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3103 	if (vp->v_type == VDIR)
3104 		error = EISDIR;
3105 #ifdef MAC
3106 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3107 	    vp))) {
3108 	}
3109 #endif
3110 	else if ((error = vn_writechk(vp)) == 0) {
3111 		VATTR_NULL(&vattr);
3112 		vattr.va_size = uap->length;
3113 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3114 	}
3115 	VOP_UNLOCK(vp, 0, td);
3116 	vn_finished_write(mp);
3117 drop:
3118 	VFS_UNLOCK_GIANT(vfslocked);
3119 	fdrop(fp, td);
3120 	return (error);
3121 }
3122 
3123 #if defined(COMPAT_43)
3124 /*
3125  * Truncate a file given its path name.
3126  */
3127 #ifndef _SYS_SYSPROTO_H_
3128 struct otruncate_args {
3129 	char	*path;
3130 	long	length;
3131 };
3132 #endif
3133 int
3134 otruncate(td, uap)
3135 	struct thread *td;
3136 	register struct otruncate_args /* {
3137 		char *path;
3138 		long length;
3139 	} */ *uap;
3140 {
3141 	struct truncate_args /* {
3142 		char *path;
3143 		int pad;
3144 		off_t length;
3145 	} */ nuap;
3146 
3147 	nuap.path = uap->path;
3148 	nuap.length = uap->length;
3149 	return (truncate(td, &nuap));
3150 }
3151 
3152 /*
3153  * Truncate a file given a file descriptor.
3154  */
3155 #ifndef _SYS_SYSPROTO_H_
3156 struct oftruncate_args {
3157 	int	fd;
3158 	long	length;
3159 };
3160 #endif
3161 int
3162 oftruncate(td, uap)
3163 	struct thread *td;
3164 	register struct oftruncate_args /* {
3165 		int fd;
3166 		long length;
3167 	} */ *uap;
3168 {
3169 	struct ftruncate_args /* {
3170 		int fd;
3171 		int pad;
3172 		off_t length;
3173 	} */ nuap;
3174 
3175 	nuap.fd = uap->fd;
3176 	nuap.length = uap->length;
3177 	return (ftruncate(td, &nuap));
3178 }
3179 #endif /* COMPAT_43 */
3180 
3181 /*
3182  * Sync an open file.
3183  */
3184 #ifndef _SYS_SYSPROTO_H_
3185 struct fsync_args {
3186 	int	fd;
3187 };
3188 #endif
3189 int
3190 fsync(td, uap)
3191 	struct thread *td;
3192 	struct fsync_args /* {
3193 		int fd;
3194 	} */ *uap;
3195 {
3196 	struct vnode *vp;
3197 	struct mount *mp;
3198 	struct file *fp;
3199 	int vfslocked;
3200 	int error;
3201 
3202 	AUDIT_ARG(fd, uap->fd);
3203 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3204 		return (error);
3205 	vp = fp->f_vnode;
3206 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3207 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3208 		goto drop;
3209 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3210 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3211 	if (vp->v_object != NULL) {
3212 		VM_OBJECT_LOCK(vp->v_object);
3213 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3214 		VM_OBJECT_UNLOCK(vp->v_object);
3215 	}
3216 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3217 
3218 	VOP_UNLOCK(vp, 0, td);
3219 	vn_finished_write(mp);
3220 drop:
3221 	VFS_UNLOCK_GIANT(vfslocked);
3222 	fdrop(fp, td);
3223 	return (error);
3224 }
3225 
3226 /*
3227  * Rename files.  Source and destination must either both be directories,
3228  * or both not be directories.  If target is a directory, it must be empty.
3229  */
3230 #ifndef _SYS_SYSPROTO_H_
3231 struct rename_args {
3232 	char	*from;
3233 	char	*to;
3234 };
3235 #endif
3236 int
3237 rename(td, uap)
3238 	struct thread *td;
3239 	register struct rename_args /* {
3240 		char *from;
3241 		char *to;
3242 	} */ *uap;
3243 {
3244 
3245 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3246 }
3247 
3248 int
3249 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3250 {
3251 	struct mount *mp = NULL;
3252 	struct vnode *tvp, *fvp, *tdvp;
3253 	struct nameidata fromnd, tond;
3254 	int tvfslocked;
3255 	int fvfslocked;
3256 	int error;
3257 
3258 	bwillwrite();
3259 #ifdef MAC
3260 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3261 	    AUDITVNODE1, pathseg, from, td);
3262 #else
3263 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3264 	    AUDITVNODE1, pathseg, from, td);
3265 #endif
3266 	if ((error = namei(&fromnd)) != 0)
3267 		return (error);
3268 	fvfslocked = NDHASGIANT(&fromnd);
3269 	tvfslocked = 0;
3270 #ifdef MAC
3271 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3272 	    fromnd.ni_vp, &fromnd.ni_cnd);
3273 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3274 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3275 #endif
3276 	fvp = fromnd.ni_vp;
3277 	if (error == 0)
3278 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3279 	if (error != 0) {
3280 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3281 		vrele(fromnd.ni_dvp);
3282 		vrele(fvp);
3283 		goto out1;
3284 	}
3285 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3286 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3287 	if (fromnd.ni_vp->v_type == VDIR)
3288 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3289 	if ((error = namei(&tond)) != 0) {
3290 		/* Translate error code for rename("dir1", "dir2/."). */
3291 		if (error == EISDIR && fvp->v_type == VDIR)
3292 			error = EINVAL;
3293 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3294 		vrele(fromnd.ni_dvp);
3295 		vrele(fvp);
3296 		vn_finished_write(mp);
3297 		goto out1;
3298 	}
3299 	tvfslocked = NDHASGIANT(&tond);
3300 	tdvp = tond.ni_dvp;
3301 	tvp = tond.ni_vp;
3302 	if (tvp != NULL) {
3303 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3304 			error = ENOTDIR;
3305 			goto out;
3306 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3307 			error = EISDIR;
3308 			goto out;
3309 		}
3310 	}
3311 	if (fvp == tdvp)
3312 		error = EINVAL;
3313 	/*
3314 	 * If the source is the same as the destination (that is, if they
3315 	 * are links to the same vnode), then there is nothing to do.
3316 	 */
3317 	if (fvp == tvp)
3318 		error = -1;
3319 #ifdef MAC
3320 	else
3321 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3322 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3323 #endif
3324 out:
3325 	if (!error) {
3326 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3327 		if (fromnd.ni_dvp != tdvp) {
3328 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3329 		}
3330 		if (tvp) {
3331 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3332 		}
3333 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3334 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3335 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3336 		NDFREE(&tond, NDF_ONLY_PNBUF);
3337 	} else {
3338 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3339 		NDFREE(&tond, NDF_ONLY_PNBUF);
3340 		if (tvp)
3341 			vput(tvp);
3342 		if (tdvp == tvp)
3343 			vrele(tdvp);
3344 		else
3345 			vput(tdvp);
3346 		vrele(fromnd.ni_dvp);
3347 		vrele(fvp);
3348 	}
3349 	vrele(tond.ni_startdir);
3350 	vn_finished_write(mp);
3351 out1:
3352 	if (fromnd.ni_startdir)
3353 		vrele(fromnd.ni_startdir);
3354 	VFS_UNLOCK_GIANT(fvfslocked);
3355 	VFS_UNLOCK_GIANT(tvfslocked);
3356 	if (error == -1)
3357 		return (0);
3358 	return (error);
3359 }
3360 
3361 /*
3362  * Make a directory file.
3363  */
3364 #ifndef _SYS_SYSPROTO_H_
3365 struct mkdir_args {
3366 	char	*path;
3367 	int	mode;
3368 };
3369 #endif
3370 int
3371 mkdir(td, uap)
3372 	struct thread *td;
3373 	register struct mkdir_args /* {
3374 		char *path;
3375 		int mode;
3376 	} */ *uap;
3377 {
3378 
3379 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3380 }
3381 
3382 int
3383 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3384 {
3385 	struct mount *mp;
3386 	struct vnode *vp;
3387 	struct vattr vattr;
3388 	int error;
3389 	struct nameidata nd;
3390 	int vfslocked;
3391 
3392 	AUDIT_ARG(mode, mode);
3393 restart:
3394 	bwillwrite();
3395 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3396 	    segflg, path, td);
3397 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3398 	if ((error = namei(&nd)) != 0)
3399 		return (error);
3400 	vfslocked = NDHASGIANT(&nd);
3401 	vp = nd.ni_vp;
3402 	if (vp != NULL) {
3403 		NDFREE(&nd, NDF_ONLY_PNBUF);
3404 		/*
3405 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3406 		 * the strange behaviour of leaving the vnode unlocked
3407 		 * if the target is the same vnode as the parent.
3408 		 */
3409 		if (vp == nd.ni_dvp)
3410 			vrele(nd.ni_dvp);
3411 		else
3412 			vput(nd.ni_dvp);
3413 		vrele(vp);
3414 		VFS_UNLOCK_GIANT(vfslocked);
3415 		return (EEXIST);
3416 	}
3417 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3418 		NDFREE(&nd, NDF_ONLY_PNBUF);
3419 		vput(nd.ni_dvp);
3420 		VFS_UNLOCK_GIANT(vfslocked);
3421 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3422 			return (error);
3423 		goto restart;
3424 	}
3425 	VATTR_NULL(&vattr);
3426 	vattr.va_type = VDIR;
3427 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3428 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3429 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3430 #ifdef MAC
3431 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3432 	    &vattr);
3433 	if (error)
3434 		goto out;
3435 #endif
3436 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3437 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3438 #ifdef MAC
3439 out:
3440 #endif
3441 	NDFREE(&nd, NDF_ONLY_PNBUF);
3442 	vput(nd.ni_dvp);
3443 	if (!error)
3444 		vput(nd.ni_vp);
3445 	vn_finished_write(mp);
3446 	VFS_UNLOCK_GIANT(vfslocked);
3447 	return (error);
3448 }
3449 
3450 /*
3451  * Remove a directory file.
3452  */
3453 #ifndef _SYS_SYSPROTO_H_
3454 struct rmdir_args {
3455 	char	*path;
3456 };
3457 #endif
3458 int
3459 rmdir(td, uap)
3460 	struct thread *td;
3461 	struct rmdir_args /* {
3462 		char *path;
3463 	} */ *uap;
3464 {
3465 
3466 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3467 }
3468 
3469 int
3470 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3471 {
3472 	struct mount *mp;
3473 	struct vnode *vp;
3474 	int error;
3475 	struct nameidata nd;
3476 	int vfslocked;
3477 
3478 restart:
3479 	bwillwrite();
3480 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3481 	    pathseg, path, td);
3482 	if ((error = namei(&nd)) != 0)
3483 		return (error);
3484 	vfslocked = NDHASGIANT(&nd);
3485 	vp = nd.ni_vp;
3486 	if (vp->v_type != VDIR) {
3487 		error = ENOTDIR;
3488 		goto out;
3489 	}
3490 	/*
3491 	 * No rmdir "." please.
3492 	 */
3493 	if (nd.ni_dvp == vp) {
3494 		error = EINVAL;
3495 		goto out;
3496 	}
3497 	/*
3498 	 * The root of a mounted filesystem cannot be deleted.
3499 	 */
3500 	if (vp->v_vflag & VV_ROOT) {
3501 		error = EBUSY;
3502 		goto out;
3503 	}
3504 #ifdef MAC
3505 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3506 	    &nd.ni_cnd);
3507 	if (error)
3508 		goto out;
3509 #endif
3510 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3511 		NDFREE(&nd, NDF_ONLY_PNBUF);
3512 		vput(vp);
3513 		if (nd.ni_dvp == vp)
3514 			vrele(nd.ni_dvp);
3515 		else
3516 			vput(nd.ni_dvp);
3517 		VFS_UNLOCK_GIANT(vfslocked);
3518 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3519 			return (error);
3520 		goto restart;
3521 	}
3522 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3523 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3524 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3525 	vn_finished_write(mp);
3526 out:
3527 	NDFREE(&nd, NDF_ONLY_PNBUF);
3528 	vput(vp);
3529 	if (nd.ni_dvp == vp)
3530 		vrele(nd.ni_dvp);
3531 	else
3532 		vput(nd.ni_dvp);
3533 	VFS_UNLOCK_GIANT(vfslocked);
3534 	return (error);
3535 }
3536 
3537 #ifdef COMPAT_43
3538 /*
3539  * Read a block of directory entries in a filesystem independent format.
3540  */
3541 #ifndef _SYS_SYSPROTO_H_
3542 struct ogetdirentries_args {
3543 	int	fd;
3544 	char	*buf;
3545 	u_int	count;
3546 	long	*basep;
3547 };
3548 #endif
3549 int
3550 ogetdirentries(td, uap)
3551 	struct thread *td;
3552 	register struct ogetdirentries_args /* {
3553 		int fd;
3554 		char *buf;
3555 		u_int count;
3556 		long *basep;
3557 	} */ *uap;
3558 {
3559 	struct vnode *vp;
3560 	struct file *fp;
3561 	struct uio auio, kuio;
3562 	struct iovec aiov, kiov;
3563 	struct dirent *dp, *edp;
3564 	caddr_t dirbuf;
3565 	int error, eofflag, readcnt, vfslocked;
3566 	long loff;
3567 
3568 	/* XXX arbitrary sanity limit on `count'. */
3569 	if (uap->count > 64 * 1024)
3570 		return (EINVAL);
3571 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3572 		return (error);
3573 	if ((fp->f_flag & FREAD) == 0) {
3574 		fdrop(fp, td);
3575 		return (EBADF);
3576 	}
3577 	vp = fp->f_vnode;
3578 unionread:
3579 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3580 	if (vp->v_type != VDIR) {
3581 		VFS_UNLOCK_GIANT(vfslocked);
3582 		fdrop(fp, td);
3583 		return (EINVAL);
3584 	}
3585 	aiov.iov_base = uap->buf;
3586 	aiov.iov_len = uap->count;
3587 	auio.uio_iov = &aiov;
3588 	auio.uio_iovcnt = 1;
3589 	auio.uio_rw = UIO_READ;
3590 	auio.uio_segflg = UIO_USERSPACE;
3591 	auio.uio_td = td;
3592 	auio.uio_resid = uap->count;
3593 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3594 	loff = auio.uio_offset = fp->f_offset;
3595 #ifdef MAC
3596 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3597 	if (error) {
3598 		VOP_UNLOCK(vp, 0, td);
3599 		VFS_UNLOCK_GIANT(vfslocked);
3600 		fdrop(fp, td);
3601 		return (error);
3602 	}
3603 #endif
3604 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3605 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3606 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3607 			    NULL, NULL);
3608 			fp->f_offset = auio.uio_offset;
3609 		} else
3610 #	endif
3611 	{
3612 		kuio = auio;
3613 		kuio.uio_iov = &kiov;
3614 		kuio.uio_segflg = UIO_SYSSPACE;
3615 		kiov.iov_len = uap->count;
3616 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3617 		kiov.iov_base = dirbuf;
3618 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3619 			    NULL, NULL);
3620 		fp->f_offset = kuio.uio_offset;
3621 		if (error == 0) {
3622 			readcnt = uap->count - kuio.uio_resid;
3623 			edp = (struct dirent *)&dirbuf[readcnt];
3624 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3625 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3626 					/*
3627 					 * The expected low byte of
3628 					 * dp->d_namlen is our dp->d_type.
3629 					 * The high MBZ byte of dp->d_namlen
3630 					 * is our dp->d_namlen.
3631 					 */
3632 					dp->d_type = dp->d_namlen;
3633 					dp->d_namlen = 0;
3634 #				else
3635 					/*
3636 					 * The dp->d_type is the high byte
3637 					 * of the expected dp->d_namlen,
3638 					 * so must be zero'ed.
3639 					 */
3640 					dp->d_type = 0;
3641 #				endif
3642 				if (dp->d_reclen > 0) {
3643 					dp = (struct dirent *)
3644 					    ((char *)dp + dp->d_reclen);
3645 				} else {
3646 					error = EIO;
3647 					break;
3648 				}
3649 			}
3650 			if (dp >= edp)
3651 				error = uiomove(dirbuf, readcnt, &auio);
3652 		}
3653 		FREE(dirbuf, M_TEMP);
3654 	}
3655 	VOP_UNLOCK(vp, 0, td);
3656 	if (error) {
3657 		VFS_UNLOCK_GIANT(vfslocked);
3658 		fdrop(fp, td);
3659 		return (error);
3660 	}
3661 	if (uap->count == auio.uio_resid) {
3662 		if (union_dircheckp) {
3663 			error = union_dircheckp(td, &vp, fp);
3664 			if (error == -1) {
3665 				VFS_UNLOCK_GIANT(vfslocked);
3666 				goto unionread;
3667 			}
3668 			if (error) {
3669 				VFS_UNLOCK_GIANT(vfslocked);
3670 				fdrop(fp, td);
3671 				return (error);
3672 			}
3673 		}
3674 		/*
3675 		 * XXX We could delay dropping the lock above but
3676 		 * union_dircheckp complicates things.
3677 		 */
3678 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3679 		if ((vp->v_vflag & VV_ROOT) &&
3680 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3681 			struct vnode *tvp = vp;
3682 			vp = vp->v_mount->mnt_vnodecovered;
3683 			VREF(vp);
3684 			fp->f_vnode = vp;
3685 			fp->f_data = vp;
3686 			fp->f_offset = 0;
3687 			vput(tvp);
3688 			VFS_UNLOCK_GIANT(vfslocked);
3689 			goto unionread;
3690 		}
3691 		VOP_UNLOCK(vp, 0, td);
3692 	}
3693 	VFS_UNLOCK_GIANT(vfslocked);
3694 	error = copyout(&loff, uap->basep, sizeof(long));
3695 	fdrop(fp, td);
3696 	td->td_retval[0] = uap->count - auio.uio_resid;
3697 	return (error);
3698 }
3699 #endif /* COMPAT_43 */
3700 
3701 /*
3702  * Read a block of directory entries in a filesystem independent format.
3703  */
3704 #ifndef _SYS_SYSPROTO_H_
3705 struct getdirentries_args {
3706 	int	fd;
3707 	char	*buf;
3708 	u_int	count;
3709 	long	*basep;
3710 };
3711 #endif
3712 int
3713 getdirentries(td, uap)
3714 	struct thread *td;
3715 	register struct getdirentries_args /* {
3716 		int fd;
3717 		char *buf;
3718 		u_int count;
3719 		long *basep;
3720 	} */ *uap;
3721 {
3722 	struct vnode *vp;
3723 	struct file *fp;
3724 	struct uio auio;
3725 	struct iovec aiov;
3726 	int vfslocked;
3727 	long loff;
3728 	int error, eofflag;
3729 
3730 	AUDIT_ARG(fd, uap->fd);
3731 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3732 		return (error);
3733 	if ((fp->f_flag & FREAD) == 0) {
3734 		fdrop(fp, td);
3735 		return (EBADF);
3736 	}
3737 	vp = fp->f_vnode;
3738 unionread:
3739 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3740 	if (vp->v_type != VDIR) {
3741 		error = EINVAL;
3742 		goto fail;
3743 	}
3744 	aiov.iov_base = uap->buf;
3745 	aiov.iov_len = uap->count;
3746 	auio.uio_iov = &aiov;
3747 	auio.uio_iovcnt = 1;
3748 	auio.uio_rw = UIO_READ;
3749 	auio.uio_segflg = UIO_USERSPACE;
3750 	auio.uio_td = td;
3751 	auio.uio_resid = uap->count;
3752 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3753 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3754 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3755 	loff = auio.uio_offset = fp->f_offset;
3756 #ifdef MAC
3757 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3758 	if (error == 0)
3759 #endif
3760 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3761 		    NULL);
3762 	fp->f_offset = auio.uio_offset;
3763 	VOP_UNLOCK(vp, 0, td);
3764 	if (error)
3765 		goto fail;
3766 	if (uap->count == auio.uio_resid) {
3767 		if (union_dircheckp) {
3768 			error = union_dircheckp(td, &vp, fp);
3769 			if (error == -1) {
3770 				VFS_UNLOCK_GIANT(vfslocked);
3771 				goto unionread;
3772 			}
3773 			if (error)
3774 				goto fail;
3775 		}
3776 		/*
3777 		 * XXX We could delay dropping the lock above but
3778 		 * union_dircheckp complicates things.
3779 		 */
3780 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3781 		if ((vp->v_vflag & VV_ROOT) &&
3782 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3783 			struct vnode *tvp = vp;
3784 			vp = vp->v_mount->mnt_vnodecovered;
3785 			VREF(vp);
3786 			fp->f_vnode = vp;
3787 			fp->f_data = vp;
3788 			fp->f_offset = 0;
3789 			vput(tvp);
3790 			VFS_UNLOCK_GIANT(vfslocked);
3791 			goto unionread;
3792 		}
3793 		VOP_UNLOCK(vp, 0, td);
3794 	}
3795 	if (uap->basep != NULL) {
3796 		error = copyout(&loff, uap->basep, sizeof(long));
3797 	}
3798 	td->td_retval[0] = uap->count - auio.uio_resid;
3799 fail:
3800 	VFS_UNLOCK_GIANT(vfslocked);
3801 	fdrop(fp, td);
3802 	return (error);
3803 }
3804 #ifndef _SYS_SYSPROTO_H_
3805 struct getdents_args {
3806 	int fd;
3807 	char *buf;
3808 	size_t count;
3809 };
3810 #endif
3811 int
3812 getdents(td, uap)
3813 	struct thread *td;
3814 	register struct getdents_args /* {
3815 		int fd;
3816 		char *buf;
3817 		u_int count;
3818 	} */ *uap;
3819 {
3820 	struct getdirentries_args ap;
3821 	ap.fd = uap->fd;
3822 	ap.buf = uap->buf;
3823 	ap.count = uap->count;
3824 	ap.basep = NULL;
3825 	return (getdirentries(td, &ap));
3826 }
3827 
3828 /*
3829  * Set the mode mask for creation of filesystem nodes.
3830  *
3831  * MP SAFE
3832  */
3833 #ifndef _SYS_SYSPROTO_H_
3834 struct umask_args {
3835 	int	newmask;
3836 };
3837 #endif
3838 int
3839 umask(td, uap)
3840 	struct thread *td;
3841 	struct umask_args /* {
3842 		int newmask;
3843 	} */ *uap;
3844 {
3845 	register struct filedesc *fdp;
3846 
3847 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3848 	fdp = td->td_proc->p_fd;
3849 	td->td_retval[0] = fdp->fd_cmask;
3850 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3851 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3852 	return (0);
3853 }
3854 
3855 /*
3856  * Void all references to file by ripping underlying filesystem
3857  * away from vnode.
3858  */
3859 #ifndef _SYS_SYSPROTO_H_
3860 struct revoke_args {
3861 	char	*path;
3862 };
3863 #endif
3864 int
3865 revoke(td, uap)
3866 	struct thread *td;
3867 	register struct revoke_args /* {
3868 		char *path;
3869 	} */ *uap;
3870 {
3871 	struct vnode *vp;
3872 	struct vattr vattr;
3873 	int error;
3874 	struct nameidata nd;
3875 	int vfslocked;
3876 
3877 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3878 	    UIO_USERSPACE, uap->path, td);
3879 	if ((error = namei(&nd)) != 0)
3880 		return (error);
3881 	vfslocked = NDHASGIANT(&nd);
3882 	vp = nd.ni_vp;
3883 	NDFREE(&nd, NDF_ONLY_PNBUF);
3884 	if (vp->v_type != VCHR) {
3885 		error = EINVAL;
3886 		goto out;
3887 	}
3888 #ifdef MAC
3889 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3890 	if (error)
3891 		goto out;
3892 #endif
3893 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3894 	if (error)
3895 		goto out;
3896 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3897 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3898 		if (error)
3899 			goto out;
3900 	}
3901 	if (vcount(vp) > 1)
3902 		VOP_REVOKE(vp, REVOKEALL);
3903 out:
3904 	vput(vp);
3905 	VFS_UNLOCK_GIANT(vfslocked);
3906 	return (error);
3907 }
3908 
3909 /*
3910  * Convert a user file descriptor to a kernel file entry.
3911  * A reference on the file entry is held upon returning.
3912  */
3913 int
3914 getvnode(fdp, fd, fpp)
3915 	struct filedesc *fdp;
3916 	int fd;
3917 	struct file **fpp;
3918 {
3919 	int error;
3920 	struct file *fp;
3921 
3922 	fp = NULL;
3923 	if (fdp == NULL)
3924 		error = EBADF;
3925 	else {
3926 		FILEDESC_LOCK(fdp);
3927 		if ((u_int)fd >= fdp->fd_nfiles ||
3928 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3929 			error = EBADF;
3930 		else if (fp->f_vnode == NULL) {
3931 			fp = NULL;
3932 			error = EINVAL;
3933 		} else {
3934 			fhold(fp);
3935 			error = 0;
3936 		}
3937 		FILEDESC_UNLOCK(fdp);
3938 	}
3939 	*fpp = fp;
3940 	return (error);
3941 }
3942 
3943 /*
3944  * Get (NFS) file handle
3945  */
3946 #ifndef _SYS_SYSPROTO_H_
3947 struct lgetfh_args {
3948 	char	*fname;
3949 	fhandle_t *fhp;
3950 };
3951 #endif
3952 int
3953 lgetfh(td, uap)
3954 	struct thread *td;
3955 	register struct lgetfh_args *uap;
3956 {
3957 	struct nameidata nd;
3958 	fhandle_t fh;
3959 	register struct vnode *vp;
3960 	int vfslocked;
3961 	int error;
3962 
3963 	error = suser(td);
3964 	if (error)
3965 		return (error);
3966 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3967 	    UIO_USERSPACE, uap->fname, td);
3968 	error = namei(&nd);
3969 	if (error)
3970 		return (error);
3971 	vfslocked = NDHASGIANT(&nd);
3972 	NDFREE(&nd, NDF_ONLY_PNBUF);
3973 	vp = nd.ni_vp;
3974 	bzero(&fh, sizeof(fh));
3975 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3976 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3977 	vput(vp);
3978 	VFS_UNLOCK_GIANT(vfslocked);
3979 	if (error)
3980 		return (error);
3981 	error = copyout(&fh, uap->fhp, sizeof (fh));
3982 	return (error);
3983 }
3984 
3985 #ifndef _SYS_SYSPROTO_H_
3986 struct getfh_args {
3987 	char	*fname;
3988 	fhandle_t *fhp;
3989 };
3990 #endif
3991 int
3992 getfh(td, uap)
3993 	struct thread *td;
3994 	register struct getfh_args *uap;
3995 {
3996 	struct nameidata nd;
3997 	fhandle_t fh;
3998 	register struct vnode *vp;
3999 	int vfslocked;
4000 	int error;
4001 
4002 	error = suser(td);
4003 	if (error)
4004 		return (error);
4005 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4006 	    UIO_USERSPACE, uap->fname, td);
4007 	error = namei(&nd);
4008 	if (error)
4009 		return (error);
4010 	vfslocked = NDHASGIANT(&nd);
4011 	NDFREE(&nd, NDF_ONLY_PNBUF);
4012 	vp = nd.ni_vp;
4013 	bzero(&fh, sizeof(fh));
4014 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4015 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4016 	vput(vp);
4017 	VFS_UNLOCK_GIANT(vfslocked);
4018 	if (error)
4019 		return (error);
4020 	error = copyout(&fh, uap->fhp, sizeof (fh));
4021 	return (error);
4022 }
4023 
4024 /*
4025  * syscall for the rpc.lockd to use to translate a NFS file handle into
4026  * an open descriptor.
4027  *
4028  * warning: do not remove the suser() call or this becomes one giant
4029  * security hole.
4030  *
4031  * MP SAFE
4032  */
4033 #ifndef _SYS_SYSPROTO_H_
4034 struct fhopen_args {
4035 	const struct fhandle *u_fhp;
4036 	int flags;
4037 };
4038 #endif
4039 int
4040 fhopen(td, uap)
4041 	struct thread *td;
4042 	struct fhopen_args /* {
4043 		const struct fhandle *u_fhp;
4044 		int flags;
4045 	} */ *uap;
4046 {
4047 	struct proc *p = td->td_proc;
4048 	struct mount *mp;
4049 	struct vnode *vp;
4050 	struct fhandle fhp;
4051 	struct vattr vat;
4052 	struct vattr *vap = &vat;
4053 	struct flock lf;
4054 	struct file *fp;
4055 	register struct filedesc *fdp = p->p_fd;
4056 	int fmode, mode, error, type;
4057 	struct file *nfp;
4058 	int vfslocked;
4059 	int indx;
4060 
4061 	error = suser(td);
4062 	if (error)
4063 		return (error);
4064 	fmode = FFLAGS(uap->flags);
4065 	/* why not allow a non-read/write open for our lockd? */
4066 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4067 		return (EINVAL);
4068 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4069 	if (error)
4070 		return(error);
4071 	/* find the mount point */
4072 	mp = vfs_getvfs(&fhp.fh_fsid);
4073 	if (mp == NULL)
4074 		return (ESTALE);
4075 	vfslocked = VFS_LOCK_GIANT(mp);
4076 	/* now give me my vnode, it gets returned to me locked */
4077 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4078 	if (error)
4079 		goto out;
4080 	/*
4081 	 * from now on we have to make sure not
4082 	 * to forget about the vnode
4083 	 * any error that causes an abort must vput(vp)
4084 	 * just set error = err and 'goto bad;'.
4085 	 */
4086 
4087 	/*
4088 	 * from vn_open
4089 	 */
4090 	if (vp->v_type == VLNK) {
4091 		error = EMLINK;
4092 		goto bad;
4093 	}
4094 	if (vp->v_type == VSOCK) {
4095 		error = EOPNOTSUPP;
4096 		goto bad;
4097 	}
4098 	mode = 0;
4099 	if (fmode & (FWRITE | O_TRUNC)) {
4100 		if (vp->v_type == VDIR) {
4101 			error = EISDIR;
4102 			goto bad;
4103 		}
4104 		error = vn_writechk(vp);
4105 		if (error)
4106 			goto bad;
4107 		mode |= VWRITE;
4108 	}
4109 	if (fmode & FREAD)
4110 		mode |= VREAD;
4111 	if (fmode & O_APPEND)
4112 		mode |= VAPPEND;
4113 #ifdef MAC
4114 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4115 	if (error)
4116 		goto bad;
4117 #endif
4118 	if (mode) {
4119 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4120 		if (error)
4121 			goto bad;
4122 	}
4123 	if (fmode & O_TRUNC) {
4124 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4125 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4126 			vrele(vp);
4127 			goto out;
4128 		}
4129 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4130 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4131 #ifdef MAC
4132 		/*
4133 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4134 		 * should be right.
4135 		 */
4136 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4137 		if (error == 0) {
4138 #endif
4139 			VATTR_NULL(vap);
4140 			vap->va_size = 0;
4141 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4142 #ifdef MAC
4143 		}
4144 #endif
4145 		vn_finished_write(mp);
4146 		if (error)
4147 			goto bad;
4148 	}
4149 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4150 	if (error)
4151 		goto bad;
4152 
4153 	if (fmode & FWRITE)
4154 		vp->v_writecount++;
4155 
4156 	/*
4157 	 * end of vn_open code
4158 	 */
4159 
4160 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4161 		if (fmode & FWRITE)
4162 			vp->v_writecount--;
4163 		goto bad;
4164 	}
4165 	/* An extra reference on `nfp' has been held for us by falloc(). */
4166 	fp = nfp;
4167 
4168 	nfp->f_vnode = vp;
4169 	nfp->f_data = vp;
4170 	nfp->f_flag = fmode & FMASK;
4171 	nfp->f_ops = &vnops;
4172 	nfp->f_type = DTYPE_VNODE;
4173 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4174 		lf.l_whence = SEEK_SET;
4175 		lf.l_start = 0;
4176 		lf.l_len = 0;
4177 		if (fmode & O_EXLOCK)
4178 			lf.l_type = F_WRLCK;
4179 		else
4180 			lf.l_type = F_RDLCK;
4181 		type = F_FLOCK;
4182 		if ((fmode & FNONBLOCK) == 0)
4183 			type |= F_WAIT;
4184 		VOP_UNLOCK(vp, 0, td);
4185 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4186 			    type)) != 0) {
4187 			/*
4188 			 * The lock request failed.  Normally close the
4189 			 * descriptor but handle the case where someone might
4190 			 * have dup()d or close()d it when we weren't looking.
4191 			 */
4192 			fdclose(fdp, fp, indx, td);
4193 
4194 			/*
4195 			 * release our private reference
4196 			 */
4197 			fdrop(fp, td);
4198 			goto out;
4199 		}
4200 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4201 		fp->f_flag |= FHASLOCK;
4202 	}
4203 
4204 	VOP_UNLOCK(vp, 0, td);
4205 	fdrop(fp, td);
4206 	vfs_rel(mp);
4207 	VFS_UNLOCK_GIANT(vfslocked);
4208 	td->td_retval[0] = indx;
4209 	return (0);
4210 
4211 bad:
4212 	vput(vp);
4213 out:
4214 	vfs_rel(mp);
4215 	VFS_UNLOCK_GIANT(vfslocked);
4216 	return (error);
4217 }
4218 
4219 /*
4220  * Stat an (NFS) file handle.
4221  *
4222  * MP SAFE
4223  */
4224 #ifndef _SYS_SYSPROTO_H_
4225 struct fhstat_args {
4226 	struct fhandle *u_fhp;
4227 	struct stat *sb;
4228 };
4229 #endif
4230 int
4231 fhstat(td, uap)
4232 	struct thread *td;
4233 	register struct fhstat_args /* {
4234 		struct fhandle *u_fhp;
4235 		struct stat *sb;
4236 	} */ *uap;
4237 {
4238 	struct stat sb;
4239 	fhandle_t fh;
4240 	struct mount *mp;
4241 	struct vnode *vp;
4242 	int vfslocked;
4243 	int error;
4244 
4245 	error = suser(td);
4246 	if (error)
4247 		return (error);
4248 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4249 	if (error)
4250 		return (error);
4251 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4252 		return (ESTALE);
4253 	vfslocked = VFS_LOCK_GIANT(mp);
4254 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4255 		vfs_rel(mp);
4256 		VFS_UNLOCK_GIANT(vfslocked);
4257 		return (error);
4258 	}
4259 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4260 	vput(vp);
4261 	vfs_rel(mp);
4262 	VFS_UNLOCK_GIANT(vfslocked);
4263 	if (error)
4264 		return (error);
4265 	error = copyout(&sb, uap->sb, sizeof(sb));
4266 	return (error);
4267 }
4268 
4269 /*
4270  * Implement fstatfs() for (NFS) file handles.
4271  *
4272  * MP SAFE
4273  */
4274 #ifndef _SYS_SYSPROTO_H_
4275 struct fhstatfs_args {
4276 	struct fhandle *u_fhp;
4277 	struct statfs *buf;
4278 };
4279 #endif
4280 int
4281 fhstatfs(td, uap)
4282 	struct thread *td;
4283 	struct fhstatfs_args /* {
4284 		struct fhandle *u_fhp;
4285 		struct statfs *buf;
4286 	} */ *uap;
4287 {
4288 	struct statfs sf;
4289 	fhandle_t fh;
4290 	int error;
4291 
4292 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4293 	if (error)
4294 		return (error);
4295 	error = kern_fhstatfs(td, fh, &sf);
4296 	if (error)
4297 		return (error);
4298 	return (copyout(&sf, uap->buf, sizeof(sf)));
4299 }
4300 
4301 int
4302 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4303 {
4304 	struct statfs *sp;
4305 	struct mount *mp;
4306 	struct vnode *vp;
4307 	int vfslocked;
4308 	int error;
4309 
4310 	error = suser(td);
4311 	if (error)
4312 		return (error);
4313 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4314 		return (ESTALE);
4315 	vfslocked = VFS_LOCK_GIANT(mp);
4316 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4317 	if (error) {
4318 		VFS_UNLOCK_GIANT(vfslocked);
4319 		vfs_rel(mp);
4320 		return (error);
4321 	}
4322 	vput(vp);
4323 	sp = NULL;
4324 	error = prison_canseemount(td->td_ucred, mp);
4325 	if (error)
4326 		goto out;
4327 #ifdef MAC
4328 	error = mac_check_mount_stat(td->td_ucred, mp);
4329 	if (error)
4330 		goto out;
4331 #endif
4332 	/*
4333 	 * Set these in case the underlying filesystem fails to do so.
4334 	 */
4335 	sp = &mp->mnt_stat;
4336 	sp->f_version = STATFS_VERSION;
4337 	sp->f_namemax = NAME_MAX;
4338 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4339 	error = VFS_STATFS(mp, sp, td);
4340 out:
4341 	vfs_rel(mp);
4342 	VFS_UNLOCK_GIANT(vfslocked);
4343 	if (sp)
4344 		*buf = *sp;
4345 	return (error);
4346 }
4347 
4348 /*
4349  * Syscall to push extended attribute configuration information into the
4350  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4351  * a command (int cmd), and attribute name and misc data.  For now, the
4352  * attribute name is left in userspace for consumption by the VFS_op.
4353  * It will probably be changed to be copied into sysspace by the
4354  * syscall in the future, once issues with various consumers of the
4355  * attribute code have raised their hands.
4356  *
4357  * Currently this is used only by UFS Extended Attributes.
4358  */
4359 int
4360 extattrctl(td, uap)
4361 	struct thread *td;
4362 	struct extattrctl_args /* {
4363 		const char *path;
4364 		int cmd;
4365 		const char *filename;
4366 		int attrnamespace;
4367 		const char *attrname;
4368 	} */ *uap;
4369 {
4370 	struct vnode *filename_vp;
4371 	struct nameidata nd;
4372 	struct mount *mp, *mp_writable;
4373 	char attrname[EXTATTR_MAXNAMELEN];
4374 	int vfslocked, fnvfslocked, error;
4375 
4376 	AUDIT_ARG(cmd, uap->cmd);
4377 	AUDIT_ARG(value, uap->attrnamespace);
4378 	/*
4379 	 * uap->attrname is not always defined.  We check again later when we
4380 	 * invoke the VFS call so as to pass in NULL there if needed.
4381 	 */
4382 	if (uap->attrname != NULL) {
4383 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4384 		    NULL);
4385 		if (error)
4386 			return (error);
4387 	}
4388 	AUDIT_ARG(text, attrname);
4389 
4390 	vfslocked = fnvfslocked = 0;
4391 	/*
4392 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4393 	 * which VFS_EXTATTRCTL() will later release.
4394 	 */
4395 	filename_vp = NULL;
4396 	if (uap->filename != NULL) {
4397 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4398 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4399 		error = namei(&nd);
4400 		if (error)
4401 			return (error);
4402 		fnvfslocked = NDHASGIANT(&nd);
4403 		filename_vp = nd.ni_vp;
4404 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4405 	}
4406 
4407 	/* uap->path is always defined. */
4408 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4409 	    uap->path, td);
4410 	error = namei(&nd);
4411 	if (error) {
4412 		if (filename_vp != NULL)
4413 			vput(filename_vp);
4414 		goto out;
4415 	}
4416 	vfslocked = NDHASGIANT(&nd);
4417 	mp = nd.ni_vp->v_mount;
4418 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4419 	NDFREE(&nd, 0);
4420 	if (error) {
4421 		if (filename_vp != NULL)
4422 			vput(filename_vp);
4423 		goto out;
4424 	}
4425 
4426 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4427 	    uap->attrname != NULL ? attrname : NULL, td);
4428 
4429 	vn_finished_write(mp_writable);
4430 	/*
4431 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4432 	 * filename_vp, so vrele it if it is defined.
4433 	 */
4434 	if (filename_vp != NULL)
4435 		vrele(filename_vp);
4436 out:
4437 	VFS_UNLOCK_GIANT(fnvfslocked);
4438 	VFS_UNLOCK_GIANT(vfslocked);
4439 	return (error);
4440 }
4441 
4442 /*-
4443  * Set a named extended attribute on a file or directory
4444  *
4445  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4446  *            kernelspace string pointer "attrname", userspace buffer
4447  *            pointer "data", buffer length "nbytes", thread "td".
4448  * Returns: 0 on success, an error number otherwise
4449  * Locks: none
4450  * References: vp must be a valid reference for the duration of the call
4451  */
4452 static int
4453 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4454     void *data, size_t nbytes, struct thread *td)
4455 {
4456 	struct mount *mp;
4457 	struct uio auio;
4458 	struct iovec aiov;
4459 	ssize_t cnt;
4460 	int error;
4461 
4462 	VFS_ASSERT_GIANT(vp->v_mount);
4463 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4464 	if (error)
4465 		return (error);
4466 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4467 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4468 
4469 	aiov.iov_base = data;
4470 	aiov.iov_len = nbytes;
4471 	auio.uio_iov = &aiov;
4472 	auio.uio_iovcnt = 1;
4473 	auio.uio_offset = 0;
4474 	if (nbytes > INT_MAX) {
4475 		error = EINVAL;
4476 		goto done;
4477 	}
4478 	auio.uio_resid = nbytes;
4479 	auio.uio_rw = UIO_WRITE;
4480 	auio.uio_segflg = UIO_USERSPACE;
4481 	auio.uio_td = td;
4482 	cnt = nbytes;
4483 
4484 #ifdef MAC
4485 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4486 	    attrname, &auio);
4487 	if (error)
4488 		goto done;
4489 #endif
4490 
4491 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4492 	    td->td_ucred, td);
4493 	cnt -= auio.uio_resid;
4494 	td->td_retval[0] = cnt;
4495 
4496 done:
4497 	VOP_UNLOCK(vp, 0, td);
4498 	vn_finished_write(mp);
4499 	return (error);
4500 }
4501 
4502 int
4503 extattr_set_fd(td, uap)
4504 	struct thread *td;
4505 	struct extattr_set_fd_args /* {
4506 		int fd;
4507 		int attrnamespace;
4508 		const char *attrname;
4509 		void *data;
4510 		size_t nbytes;
4511 	} */ *uap;
4512 {
4513 	struct file *fp;
4514 	char attrname[EXTATTR_MAXNAMELEN];
4515 	int vfslocked, error;
4516 
4517 	AUDIT_ARG(fd, uap->fd);
4518 	AUDIT_ARG(value, uap->attrnamespace);
4519 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4520 	if (error)
4521 		return (error);
4522 	AUDIT_ARG(text, attrname);
4523 
4524 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4525 	if (error)
4526 		return (error);
4527 
4528 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4529 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4530 	    attrname, uap->data, uap->nbytes, td);
4531 	fdrop(fp, td);
4532 	VFS_UNLOCK_GIANT(vfslocked);
4533 
4534 	return (error);
4535 }
4536 
4537 int
4538 extattr_set_file(td, uap)
4539 	struct thread *td;
4540 	struct extattr_set_file_args /* {
4541 		const char *path;
4542 		int attrnamespace;
4543 		const char *attrname;
4544 		void *data;
4545 		size_t nbytes;
4546 	} */ *uap;
4547 {
4548 	struct nameidata nd;
4549 	char attrname[EXTATTR_MAXNAMELEN];
4550 	int vfslocked, error;
4551 
4552 	AUDIT_ARG(value, uap->attrnamespace);
4553 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4554 	if (error)
4555 		return (error);
4556 	AUDIT_ARG(text, attrname);
4557 
4558 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4559 	    uap->path, td);
4560 	error = namei(&nd);
4561 	if (error)
4562 		return (error);
4563 	NDFREE(&nd, NDF_ONLY_PNBUF);
4564 
4565 	vfslocked = NDHASGIANT(&nd);
4566 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4567 	    uap->data, uap->nbytes, td);
4568 
4569 	vrele(nd.ni_vp);
4570 	VFS_UNLOCK_GIANT(vfslocked);
4571 	return (error);
4572 }
4573 
4574 int
4575 extattr_set_link(td, uap)
4576 	struct thread *td;
4577 	struct extattr_set_link_args /* {
4578 		const char *path;
4579 		int attrnamespace;
4580 		const char *attrname;
4581 		void *data;
4582 		size_t nbytes;
4583 	} */ *uap;
4584 {
4585 	struct nameidata nd;
4586 	char attrname[EXTATTR_MAXNAMELEN];
4587 	int vfslocked, error;
4588 
4589 	AUDIT_ARG(value, uap->attrnamespace);
4590 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4591 	if (error)
4592 		return (error);
4593 	AUDIT_ARG(text, attrname);
4594 
4595 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4596 	    uap->path, td);
4597 	error = namei(&nd);
4598 	if (error)
4599 		return (error);
4600 	NDFREE(&nd, NDF_ONLY_PNBUF);
4601 
4602 	vfslocked = NDHASGIANT(&nd);
4603 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4604 	    uap->data, uap->nbytes, td);
4605 
4606 	vrele(nd.ni_vp);
4607 	VFS_UNLOCK_GIANT(vfslocked);
4608 	return (error);
4609 }
4610 
4611 /*-
4612  * Get a named extended attribute on a file or directory
4613  *
4614  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4615  *            kernelspace string pointer "attrname", userspace buffer
4616  *            pointer "data", buffer length "nbytes", thread "td".
4617  * Returns: 0 on success, an error number otherwise
4618  * Locks: none
4619  * References: vp must be a valid reference for the duration of the call
4620  */
4621 static int
4622 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4623     void *data, size_t nbytes, struct thread *td)
4624 {
4625 	struct uio auio, *auiop;
4626 	struct iovec aiov;
4627 	ssize_t cnt;
4628 	size_t size, *sizep;
4629 	int error;
4630 
4631 	VFS_ASSERT_GIANT(vp->v_mount);
4632 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4633 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4634 
4635 	/*
4636 	 * Slightly unusual semantics: if the user provides a NULL data
4637 	 * pointer, they don't want to receive the data, just the
4638 	 * maximum read length.
4639 	 */
4640 	auiop = NULL;
4641 	sizep = NULL;
4642 	cnt = 0;
4643 	if (data != NULL) {
4644 		aiov.iov_base = data;
4645 		aiov.iov_len = nbytes;
4646 		auio.uio_iov = &aiov;
4647 		auio.uio_iovcnt = 1;
4648 		auio.uio_offset = 0;
4649 		if (nbytes > INT_MAX) {
4650 			error = EINVAL;
4651 			goto done;
4652 		}
4653 		auio.uio_resid = nbytes;
4654 		auio.uio_rw = UIO_READ;
4655 		auio.uio_segflg = UIO_USERSPACE;
4656 		auio.uio_td = td;
4657 		auiop = &auio;
4658 		cnt = nbytes;
4659 	} else
4660 		sizep = &size;
4661 
4662 #ifdef MAC
4663 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4664 	    attrname, &auio);
4665 	if (error)
4666 		goto done;
4667 #endif
4668 
4669 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4670 	    td->td_ucred, td);
4671 
4672 	if (auiop != NULL) {
4673 		cnt -= auio.uio_resid;
4674 		td->td_retval[0] = cnt;
4675 	} else
4676 		td->td_retval[0] = size;
4677 
4678 done:
4679 	VOP_UNLOCK(vp, 0, td);
4680 	return (error);
4681 }
4682 
4683 int
4684 extattr_get_fd(td, uap)
4685 	struct thread *td;
4686 	struct extattr_get_fd_args /* {
4687 		int fd;
4688 		int attrnamespace;
4689 		const char *attrname;
4690 		void *data;
4691 		size_t nbytes;
4692 	} */ *uap;
4693 {
4694 	struct file *fp;
4695 	char attrname[EXTATTR_MAXNAMELEN];
4696 	int vfslocked, error;
4697 
4698 	AUDIT_ARG(fd, uap->fd);
4699 	AUDIT_ARG(value, uap->attrnamespace);
4700 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4701 	if (error)
4702 		return (error);
4703 	AUDIT_ARG(text, attrname);
4704 
4705 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4706 	if (error)
4707 		return (error);
4708 
4709 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4710 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4711 	    attrname, uap->data, uap->nbytes, td);
4712 
4713 	fdrop(fp, td);
4714 	VFS_UNLOCK_GIANT(vfslocked);
4715 	return (error);
4716 }
4717 
4718 int
4719 extattr_get_file(td, uap)
4720 	struct thread *td;
4721 	struct extattr_get_file_args /* {
4722 		const char *path;
4723 		int attrnamespace;
4724 		const char *attrname;
4725 		void *data;
4726 		size_t nbytes;
4727 	} */ *uap;
4728 {
4729 	struct nameidata nd;
4730 	char attrname[EXTATTR_MAXNAMELEN];
4731 	int vfslocked, error;
4732 
4733 	AUDIT_ARG(value, uap->attrnamespace);
4734 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4735 	if (error)
4736 		return (error);
4737 	AUDIT_ARG(text, attrname);
4738 
4739 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4740 	    uap->path, td);
4741 	error = namei(&nd);
4742 	if (error)
4743 		return (error);
4744 	NDFREE(&nd, NDF_ONLY_PNBUF);
4745 
4746 	vfslocked = NDHASGIANT(&nd);
4747 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4748 	    uap->data, uap->nbytes, td);
4749 
4750 	vrele(nd.ni_vp);
4751 	VFS_UNLOCK_GIANT(vfslocked);
4752 	return (error);
4753 }
4754 
4755 int
4756 extattr_get_link(td, uap)
4757 	struct thread *td;
4758 	struct extattr_get_link_args /* {
4759 		const char *path;
4760 		int attrnamespace;
4761 		const char *attrname;
4762 		void *data;
4763 		size_t nbytes;
4764 	} */ *uap;
4765 {
4766 	struct nameidata nd;
4767 	char attrname[EXTATTR_MAXNAMELEN];
4768 	int vfslocked, error;
4769 
4770 	AUDIT_ARG(value, uap->attrnamespace);
4771 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4772 	if (error)
4773 		return (error);
4774 	AUDIT_ARG(text, attrname);
4775 
4776 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4777 	    uap->path, td);
4778 	error = namei(&nd);
4779 	if (error)
4780 		return (error);
4781 	NDFREE(&nd, NDF_ONLY_PNBUF);
4782 
4783 	vfslocked = NDHASGIANT(&nd);
4784 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4785 	    uap->data, uap->nbytes, td);
4786 
4787 	vrele(nd.ni_vp);
4788 	VFS_UNLOCK_GIANT(vfslocked);
4789 	return (error);
4790 }
4791 
4792 /*
4793  * extattr_delete_vp(): Delete a named extended attribute on a file or
4794  *                      directory
4795  *
4796  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4797  *            kernelspace string pointer "attrname", proc "p"
4798  * Returns: 0 on success, an error number otherwise
4799  * Locks: none
4800  * References: vp must be a valid reference for the duration of the call
4801  */
4802 static int
4803 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4804     struct thread *td)
4805 {
4806 	struct mount *mp;
4807 	int error;
4808 
4809 	VFS_ASSERT_GIANT(vp->v_mount);
4810 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4811 	if (error)
4812 		return (error);
4813 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4814 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4815 
4816 #ifdef MAC
4817 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4818 	    attrname);
4819 	if (error)
4820 		goto done;
4821 #endif
4822 
4823 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4824 	    td);
4825 	if (error == EOPNOTSUPP)
4826 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4827 		    td->td_ucred, td);
4828 #ifdef MAC
4829 done:
4830 #endif
4831 	VOP_UNLOCK(vp, 0, td);
4832 	vn_finished_write(mp);
4833 	return (error);
4834 }
4835 
4836 int
4837 extattr_delete_fd(td, uap)
4838 	struct thread *td;
4839 	struct extattr_delete_fd_args /* {
4840 		int fd;
4841 		int attrnamespace;
4842 		const char *attrname;
4843 	} */ *uap;
4844 {
4845 	struct file *fp;
4846 	char attrname[EXTATTR_MAXNAMELEN];
4847 	int vfslocked, error;
4848 
4849 	AUDIT_ARG(fd, uap->fd);
4850 	AUDIT_ARG(value, uap->attrnamespace);
4851 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4852 	if (error)
4853 		return (error);
4854 	AUDIT_ARG(text, attrname);
4855 
4856 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4857 	if (error)
4858 		return (error);
4859 
4860 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4861 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4862 	    attrname, td);
4863 	fdrop(fp, td);
4864 	VFS_UNLOCK_GIANT(vfslocked);
4865 	return (error);
4866 }
4867 
4868 int
4869 extattr_delete_file(td, uap)
4870 	struct thread *td;
4871 	struct extattr_delete_file_args /* {
4872 		const char *path;
4873 		int attrnamespace;
4874 		const char *attrname;
4875 	} */ *uap;
4876 {
4877 	struct nameidata nd;
4878 	char attrname[EXTATTR_MAXNAMELEN];
4879 	int vfslocked, error;
4880 
4881 	AUDIT_ARG(value, uap->attrnamespace);
4882 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4883 	if (error)
4884 		return(error);
4885 	AUDIT_ARG(text, attrname);
4886 
4887 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4888 	    uap->path, td);
4889 	error = namei(&nd);
4890 	if (error)
4891 		return(error);
4892 	NDFREE(&nd, NDF_ONLY_PNBUF);
4893 
4894 	vfslocked = NDHASGIANT(&nd);
4895 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4896 	vrele(nd.ni_vp);
4897 	VFS_UNLOCK_GIANT(vfslocked);
4898 	return(error);
4899 }
4900 
4901 int
4902 extattr_delete_link(td, uap)
4903 	struct thread *td;
4904 	struct extattr_delete_link_args /* {
4905 		const char *path;
4906 		int attrnamespace;
4907 		const char *attrname;
4908 	} */ *uap;
4909 {
4910 	struct nameidata nd;
4911 	char attrname[EXTATTR_MAXNAMELEN];
4912 	int vfslocked, error;
4913 
4914 	AUDIT_ARG(value, uap->attrnamespace);
4915 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4916 	if (error)
4917 		return(error);
4918 	AUDIT_ARG(text, attrname);
4919 
4920 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4921 	    uap->path, td);
4922 	error = namei(&nd);
4923 	if (error)
4924 		return(error);
4925 	NDFREE(&nd, NDF_ONLY_PNBUF);
4926 
4927 	vfslocked = NDHASGIANT(&nd);
4928 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4929 	vrele(nd.ni_vp);
4930 	VFS_UNLOCK_GIANT(vfslocked);
4931 	return(error);
4932 }
4933 
4934 /*-
4935  * Retrieve a list of extended attributes on a file or directory.
4936  *
4937  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4938  *            userspace buffer pointer "data", buffer length "nbytes",
4939  *            thread "td".
4940  * Returns: 0 on success, an error number otherwise
4941  * Locks: none
4942  * References: vp must be a valid reference for the duration of the call
4943  */
4944 static int
4945 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4946     size_t nbytes, struct thread *td)
4947 {
4948 	struct uio auio, *auiop;
4949 	size_t size, *sizep;
4950 	struct iovec aiov;
4951 	ssize_t cnt;
4952 	int error;
4953 
4954 	VFS_ASSERT_GIANT(vp->v_mount);
4955 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4956 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4957 
4958 	auiop = NULL;
4959 	sizep = NULL;
4960 	cnt = 0;
4961 	if (data != NULL) {
4962 		aiov.iov_base = data;
4963 		aiov.iov_len = nbytes;
4964 		auio.uio_iov = &aiov;
4965 		auio.uio_iovcnt = 1;
4966 		auio.uio_offset = 0;
4967 		if (nbytes > INT_MAX) {
4968 			error = EINVAL;
4969 			goto done;
4970 		}
4971 		auio.uio_resid = nbytes;
4972 		auio.uio_rw = UIO_READ;
4973 		auio.uio_segflg = UIO_USERSPACE;
4974 		auio.uio_td = td;
4975 		auiop = &auio;
4976 		cnt = nbytes;
4977 	} else
4978 		sizep = &size;
4979 
4980 #ifdef MAC
4981 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4982 	if (error)
4983 		goto done;
4984 #endif
4985 
4986 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4987 	    td->td_ucred, td);
4988 
4989 	if (auiop != NULL) {
4990 		cnt -= auio.uio_resid;
4991 		td->td_retval[0] = cnt;
4992 	} else
4993 		td->td_retval[0] = size;
4994 
4995 done:
4996 	VOP_UNLOCK(vp, 0, td);
4997 	return (error);
4998 }
4999 
5000 
5001 int
5002 extattr_list_fd(td, uap)
5003 	struct thread *td;
5004 	struct extattr_list_fd_args /* {
5005 		int fd;
5006 		int attrnamespace;
5007 		void *data;
5008 		size_t nbytes;
5009 	} */ *uap;
5010 {
5011 	struct file *fp;
5012 	int vfslocked, error;
5013 
5014 	AUDIT_ARG(fd, uap->fd);
5015 	AUDIT_ARG(value, uap->attrnamespace);
5016 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5017 	if (error)
5018 		return (error);
5019 
5020 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5021 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5022 	    uap->nbytes, td);
5023 
5024 	fdrop(fp, td);
5025 	VFS_UNLOCK_GIANT(vfslocked);
5026 	return (error);
5027 }
5028 
5029 int
5030 extattr_list_file(td, uap)
5031 	struct thread*td;
5032 	struct extattr_list_file_args /* {
5033 		const char *path;
5034 		int attrnamespace;
5035 		void *data;
5036 		size_t nbytes;
5037 	} */ *uap;
5038 {
5039 	struct nameidata nd;
5040 	int vfslocked, error;
5041 
5042 	AUDIT_ARG(value, uap->attrnamespace);
5043 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5044 	    uap->path, td);
5045 	error = namei(&nd);
5046 	if (error)
5047 		return (error);
5048 	NDFREE(&nd, NDF_ONLY_PNBUF);
5049 
5050 	vfslocked = NDHASGIANT(&nd);
5051 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5052 	    uap->nbytes, td);
5053 
5054 	vrele(nd.ni_vp);
5055 	VFS_UNLOCK_GIANT(vfslocked);
5056 	return (error);
5057 }
5058 
5059 int
5060 extattr_list_link(td, uap)
5061 	struct thread*td;
5062 	struct extattr_list_link_args /* {
5063 		const char *path;
5064 		int attrnamespace;
5065 		void *data;
5066 		size_t nbytes;
5067 	} */ *uap;
5068 {
5069 	struct nameidata nd;
5070 	int vfslocked, error;
5071 
5072 	AUDIT_ARG(value, uap->attrnamespace);
5073 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5074 	    uap->path, td);
5075 	error = namei(&nd);
5076 	if (error)
5077 		return (error);
5078 	NDFREE(&nd, NDF_ONLY_PNBUF);
5079 
5080 	vfslocked = NDHASGIANT(&nd);
5081 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5082 	    uap->nbytes, td);
5083 
5084 	vrele(nd.ni_vp);
5085 	VFS_UNLOCK_GIANT(vfslocked);
5086 	return (error);
5087 }
5088