xref: /freebsd/sys/kern/vfs_extattr.c (revision 87569f75a91f298c52a71823c04d41cf53c88889)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/mac.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/sysproto.h>
53 #include <sys/namei.h>
54 #include <sys/filedesc.h>
55 #include <sys/kernel.h>
56 #include <sys/fcntl.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/unistd.h>
63 #include <sys/vnode.h>
64 #include <sys/proc.h>
65 #include <sys/dirent.h>
66 #include <sys/extattr.h>
67 #include <sys/jail.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <machine/stdarg.h>
72 
73 #include <security/audit/audit.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int asyncflag;
125 
126 	mtx_lock(&Giant);
127 	mtx_lock(&mountlist_mtx);
128 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
129 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
130 			nmp = TAILQ_NEXT(mp, mnt_list);
131 			continue;
132 		}
133 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
134 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
135 			asyncflag = mp->mnt_flag & MNT_ASYNC;
136 			mp->mnt_flag &= ~MNT_ASYNC;
137 			vfs_msync(mp, MNT_NOWAIT);
138 			VFS_SYNC(mp, MNT_NOWAIT, td);
139 			mp->mnt_flag |= asyncflag;
140 			vn_finished_write(mp);
141 		}
142 		mtx_lock(&mountlist_mtx);
143 		nmp = TAILQ_NEXT(mp, mnt_list);
144 		vfs_unbusy(mp, td);
145 	}
146 	mtx_unlock(&mountlist_mtx);
147 #if 0
148 /*
149  * XXX don't call vfs_bufstats() yet because that routine
150  * was not imported in the Lite2 merge.
151  */
152 #ifdef DIAGNOSTIC
153 	if (syncprt)
154 		vfs_bufstats();
155 #endif /* DIAGNOSTIC */
156 #endif
157 	mtx_unlock(&Giant);
158 	return (0);
159 }
160 
161 /* XXX PRISON: could be per prison flag */
162 static int prison_quotas;
163 #if 0
164 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
165 #endif
166 
167 /*
168  * Change filesystem quotas.
169  *
170  * MP SAFE
171  */
172 #ifndef _SYS_SYSPROTO_H_
173 struct quotactl_args {
174 	char *path;
175 	int cmd;
176 	int uid;
177 	caddr_t arg;
178 };
179 #endif
180 int
181 quotactl(td, uap)
182 	struct thread *td;
183 	register struct quotactl_args /* {
184 		char *path;
185 		int cmd;
186 		int uid;
187 		caddr_t arg;
188 	} */ *uap;
189 {
190 	struct mount *mp, *vmp;
191 	int error;
192 	struct nameidata nd;
193 
194 	if (jailed(td->td_ucred) && !prison_quotas)
195 		return (EPERM);
196 	mtx_lock(&Giant);
197 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0) {
199 		mtx_unlock(&Giant);
200 		return (error);
201 	}
202 	NDFREE(&nd, NDF_ONLY_PNBUF);
203 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
204 	mp = nd.ni_vp->v_mount;
205 	vrele(nd.ni_vp);
206 	if (error) {
207 		mtx_unlock(&Giant);
208 		return (error);
209 	}
210 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
211 	vn_finished_write(vmp);
212 	mtx_unlock(&Giant);
213 	return (error);
214 }
215 
216 /*
217  * Get filesystem statistics.
218  */
219 #ifndef _SYS_SYSPROTO_H_
220 struct statfs_args {
221 	char *path;
222 	struct statfs *buf;
223 };
224 #endif
225 int
226 statfs(td, uap)
227 	struct thread *td;
228 	register struct statfs_args /* {
229 		char *path;
230 		struct statfs *buf;
231 	} */ *uap;
232 {
233 	struct statfs sf;
234 	int error;
235 
236 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
237 	if (error == 0)
238 		error = copyout(&sf, uap->buf, sizeof(sf));
239 	return (error);
240 }
241 
242 int
243 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
244     struct statfs *buf)
245 {
246 	struct mount *mp;
247 	struct statfs *sp, sb;
248 	int error;
249 	struct nameidata nd;
250 
251 	mtx_lock(&Giant);
252 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, pathseg, path, td);
253 	error = namei(&nd);
254 	if (error) {
255 		mtx_unlock(&Giant);
256 		return (error);
257 	}
258 	mp = nd.ni_vp->v_mount;
259 	vfs_ref(mp);
260 	NDFREE(&nd, NDF_ONLY_PNBUF);
261 	vput(nd.ni_vp);
262 #ifdef MAC
263 	error = mac_check_mount_stat(td->td_ucred, mp);
264 	if (error) {
265 		vfs_rel(mp);
266 		mtx_unlock(&Giant);
267 		return (error);
268 	}
269 #endif
270 	/*
271 	 * Set these in case the underlying filesystem fails to do so.
272 	 */
273 	sp = &mp->mnt_stat;
274 	sp->f_version = STATFS_VERSION;
275 	sp->f_namemax = NAME_MAX;
276 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
277 	error = VFS_STATFS(mp, sp, td);
278 	vfs_rel(mp);
279 	if (error) {
280 		mtx_unlock(&Giant);
281 		return (error);
282 	}
283 	if (suser(td)) {
284 		bcopy(sp, &sb, sizeof(sb));
285 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
286 		prison_enforce_statfs(td->td_ucred, mp, &sb);
287 		sp = &sb;
288 	}
289 	mtx_unlock(&Giant);
290 	*buf = *sp;
291 	return (0);
292 }
293 
294 /*
295  * Get filesystem statistics.
296  */
297 #ifndef _SYS_SYSPROTO_H_
298 struct fstatfs_args {
299 	int fd;
300 	struct statfs *buf;
301 };
302 #endif
303 int
304 fstatfs(td, uap)
305 	struct thread *td;
306 	register struct fstatfs_args /* {
307 		int fd;
308 		struct statfs *buf;
309 	} */ *uap;
310 {
311 	struct statfs sf;
312 	int error;
313 
314 	error = kern_fstatfs(td, uap->fd, &sf);
315 	if (error == 0)
316 		error = copyout(&sf, uap->buf, sizeof(sf));
317 	return (error);
318 }
319 
320 int
321 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
322 {
323 	struct file *fp;
324 	struct mount *mp;
325 	struct statfs *sp, sb;
326 	struct vnode *vp;
327 	int error;
328 
329 	AUDIT_ARG(fd, fd);
330 	error = getvnode(td->td_proc->p_fd, fd, &fp);
331 	if (error)
332 		return (error);
333 	mtx_lock(&Giant);
334 	vp = fp->f_vnode;
335 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
336 #ifdef AUDIT
337 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
338 #endif
339 	mp = vp->v_mount;
340 	if (mp)
341 		vfs_ref(mp);
342 	VOP_UNLOCK(vp, 0, td);
343 	fdrop(fp, td);
344 	if (vp->v_iflag & VI_DOOMED) {
345 		if (mp)
346 			vfs_rel(mp);
347 		mtx_unlock(&Giant);
348 		return (EBADF);
349 	}
350 #ifdef MAC
351 	error = mac_check_mount_stat(td->td_ucred, mp);
352 	if (error) {
353 		vfs_rel(mp);
354 		mtx_unlock(&Giant);
355 		return (error);
356 	}
357 #endif
358 	/*
359 	 * Set these in case the underlying filesystem fails to do so.
360 	 */
361 	sp = &mp->mnt_stat;
362 	sp->f_version = STATFS_VERSION;
363 	sp->f_namemax = NAME_MAX;
364 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
365 	error = VFS_STATFS(mp, sp, td);
366 	vfs_rel(mp);
367 	if (error) {
368 		mtx_unlock(&Giant);
369 		return (error);
370 	}
371 	if (suser(td)) {
372 		bcopy(sp, &sb, sizeof(sb));
373 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
374 		prison_enforce_statfs(td->td_ucred, mp, &sb);
375 		sp = &sb;
376 	}
377 	mtx_unlock(&Giant);
378 	*buf = *sp;
379 	return (0);
380 }
381 
382 /*
383  * Get statistics on all filesystems.
384  */
385 #ifndef _SYS_SYSPROTO_H_
386 struct getfsstat_args {
387 	struct statfs *buf;
388 	long bufsize;
389 	int flags;
390 };
391 #endif
392 int
393 getfsstat(td, uap)
394 	struct thread *td;
395 	register struct getfsstat_args /* {
396 		struct statfs *buf;
397 		long bufsize;
398 		int flags;
399 	} */ *uap;
400 {
401 
402 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
403 	    uap->flags));
404 }
405 
406 /*
407  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
408  * 	The caller is responsible for freeing memory which will be allocated
409  *	in '*buf'.
410  */
411 int
412 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
413     enum uio_seg bufseg, int flags)
414 {
415 	struct mount *mp, *nmp;
416 	struct statfs *sfsp, *sp, sb;
417 	size_t count, maxcount;
418 	int error;
419 
420 	maxcount = bufsize / sizeof(struct statfs);
421 	if (bufsize == 0)
422 		sfsp = NULL;
423 	else if (bufseg == UIO_USERSPACE)
424 		sfsp = *buf;
425 	else /* if (bufseg == UIO_SYSSPACE) */ {
426 		count = 0;
427 		mtx_lock(&mountlist_mtx);
428 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
429 			count++;
430 		}
431 		mtx_unlock(&mountlist_mtx);
432 		if (maxcount > count)
433 			maxcount = count;
434 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
435 		    M_WAITOK);
436 	}
437 	count = 0;
438 	mtx_lock(&Giant);
439 	mtx_lock(&mountlist_mtx);
440 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
441 		if (prison_canseemount(td->td_ucred, mp) != 0) {
442 			nmp = TAILQ_NEXT(mp, mnt_list);
443 			continue;
444 		}
445 #ifdef MAC
446 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
447 			nmp = TAILQ_NEXT(mp, mnt_list);
448 			continue;
449 		}
450 #endif
451 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
452 			nmp = TAILQ_NEXT(mp, mnt_list);
453 			continue;
454 		}
455 		if (sfsp && count < maxcount) {
456 			sp = &mp->mnt_stat;
457 			/*
458 			 * Set these in case the underlying filesystem
459 			 * fails to do so.
460 			 */
461 			sp->f_version = STATFS_VERSION;
462 			sp->f_namemax = NAME_MAX;
463 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
464 			/*
465 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
466 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
467 			 * overrides MNT_WAIT.
468 			 */
469 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
470 			    (flags & MNT_WAIT)) &&
471 			    (error = VFS_STATFS(mp, sp, td))) {
472 				mtx_lock(&mountlist_mtx);
473 				nmp = TAILQ_NEXT(mp, mnt_list);
474 				vfs_unbusy(mp, td);
475 				continue;
476 			}
477 			if (suser(td)) {
478 				bcopy(sp, &sb, sizeof(sb));
479 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
480 				prison_enforce_statfs(td->td_ucred, mp, &sb);
481 				sp = &sb;
482 			}
483 			if (bufseg == UIO_SYSSPACE)
484 				bcopy(sp, sfsp, sizeof(*sp));
485 			else /* if (bufseg == UIO_USERSPACE) */ {
486 				error = copyout(sp, sfsp, sizeof(*sp));
487 				if (error) {
488 					vfs_unbusy(mp, td);
489 					mtx_unlock(&Giant);
490 					return (error);
491 				}
492 			}
493 			sfsp++;
494 		}
495 		count++;
496 		mtx_lock(&mountlist_mtx);
497 		nmp = TAILQ_NEXT(mp, mnt_list);
498 		vfs_unbusy(mp, td);
499 	}
500 	mtx_unlock(&mountlist_mtx);
501 	mtx_unlock(&Giant);
502 	if (sfsp && count > maxcount)
503 		td->td_retval[0] = maxcount;
504 	else
505 		td->td_retval[0] = count;
506 	return (0);
507 }
508 
509 #ifdef COMPAT_FREEBSD4
510 /*
511  * Get old format filesystem statistics.
512  */
513 static void cvtstatfs(struct statfs *, struct ostatfs *);
514 
515 #ifndef _SYS_SYSPROTO_H_
516 struct freebsd4_statfs_args {
517 	char *path;
518 	struct ostatfs *buf;
519 };
520 #endif
521 int
522 freebsd4_statfs(td, uap)
523 	struct thread *td;
524 	struct freebsd4_statfs_args /* {
525 		char *path;
526 		struct ostatfs *buf;
527 	} */ *uap;
528 {
529 	struct ostatfs osb;
530 	struct statfs sf;
531 	int error;
532 
533 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
534 	if (error)
535 		return (error);
536 	cvtstatfs(&sf, &osb);
537 	return (copyout(&osb, uap->buf, sizeof(osb)));
538 }
539 
540 /*
541  * Get filesystem statistics.
542  */
543 #ifndef _SYS_SYSPROTO_H_
544 struct freebsd4_fstatfs_args {
545 	int fd;
546 	struct ostatfs *buf;
547 };
548 #endif
549 int
550 freebsd4_fstatfs(td, uap)
551 	struct thread *td;
552 	struct freebsd4_fstatfs_args /* {
553 		int fd;
554 		struct ostatfs *buf;
555 	} */ *uap;
556 {
557 	struct ostatfs osb;
558 	struct statfs sf;
559 	int error;
560 
561 	error = kern_fstatfs(td, uap->fd, &sf);
562 	if (error)
563 		return (error);
564 	cvtstatfs(&sf, &osb);
565 	return (copyout(&osb, uap->buf, sizeof(osb)));
566 }
567 
568 /*
569  * Get statistics on all filesystems.
570  */
571 #ifndef _SYS_SYSPROTO_H_
572 struct freebsd4_getfsstat_args {
573 	struct ostatfs *buf;
574 	long bufsize;
575 	int flags;
576 };
577 #endif
578 int
579 freebsd4_getfsstat(td, uap)
580 	struct thread *td;
581 	register struct freebsd4_getfsstat_args /* {
582 		struct ostatfs *buf;
583 		long bufsize;
584 		int flags;
585 	} */ *uap;
586 {
587 	struct statfs *buf, *sp;
588 	struct ostatfs osb;
589 	size_t count, size;
590 	int error;
591 
592 	count = uap->bufsize / sizeof(struct ostatfs);
593 	size = count * sizeof(struct statfs);
594 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
595 	if (size > 0) {
596 		count = td->td_retval[0];
597 		sp = buf;
598 		while (count > 0 && error == 0) {
599 			cvtstatfs(sp, &osb);
600 			error = copyout(&osb, uap->buf, sizeof(osb));
601 			sp++;
602 			uap->buf++;
603 			count--;
604 		}
605 		free(buf, M_TEMP);
606 	}
607 	return (error);
608 }
609 
610 /*
611  * Implement fstatfs() for (NFS) file handles.
612  */
613 #ifndef _SYS_SYSPROTO_H_
614 struct freebsd4_fhstatfs_args {
615 	struct fhandle *u_fhp;
616 	struct ostatfs *buf;
617 };
618 #endif
619 int
620 freebsd4_fhstatfs(td, uap)
621 	struct thread *td;
622 	struct freebsd4_fhstatfs_args /* {
623 		struct fhandle *u_fhp;
624 		struct ostatfs *buf;
625 	} */ *uap;
626 {
627 	struct ostatfs osb;
628 	struct statfs sf;
629 	fhandle_t fh;
630 	int error;
631 
632 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
633 	if (error)
634 		return (error);
635 	error = kern_fhstatfs(td, fh, &sf);
636 	if (error)
637 		return (error);
638 	cvtstatfs(&sf, &osb);
639 	return (copyout(&osb, uap->buf, sizeof(osb)));
640 }
641 
642 /*
643  * Convert a new format statfs structure to an old format statfs structure.
644  */
645 static void
646 cvtstatfs(nsp, osp)
647 	struct statfs *nsp;
648 	struct ostatfs *osp;
649 {
650 
651 	bzero(osp, sizeof(*osp));
652 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
653 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
654 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
655 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
656 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
657 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
658 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
659 	osp->f_owner = nsp->f_owner;
660 	osp->f_type = nsp->f_type;
661 	osp->f_flags = nsp->f_flags;
662 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
663 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
664 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
665 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
666 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
667 	    MIN(MFSNAMELEN, OMFSNAMELEN));
668 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
669 	    MIN(MNAMELEN, OMNAMELEN));
670 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
671 	    MIN(MNAMELEN, OMNAMELEN));
672 	osp->f_fsid = nsp->f_fsid;
673 }
674 #endif /* COMPAT_FREEBSD4 */
675 
676 /*
677  * Change current working directory to a given file descriptor.
678  */
679 #ifndef _SYS_SYSPROTO_H_
680 struct fchdir_args {
681 	int	fd;
682 };
683 #endif
684 int
685 fchdir(td, uap)
686 	struct thread *td;
687 	struct fchdir_args /* {
688 		int fd;
689 	} */ *uap;
690 {
691 	register struct filedesc *fdp = td->td_proc->p_fd;
692 	struct vnode *vp, *tdp, *vpold;
693 	struct mount *mp;
694 	struct file *fp;
695 	int vfslocked;
696 	int error;
697 
698 	AUDIT_ARG(fd, uap->fd);
699 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
700 		return (error);
701 	vp = fp->f_vnode;
702 	VREF(vp);
703 	fdrop(fp, td);
704 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
705 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
706 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
707 	if (vp->v_type != VDIR)
708 		error = ENOTDIR;
709 #ifdef MAC
710 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
711 	}
712 #endif
713 	else
714 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
715 	while (!error && (mp = vp->v_mountedhere) != NULL) {
716 		int tvfslocked;
717 		if (vfs_busy(mp, 0, 0, td))
718 			continue;
719 		tvfslocked = VFS_LOCK_GIANT(mp);
720 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
721 		vfs_unbusy(mp, td);
722 		if (error) {
723 			VFS_UNLOCK_GIANT(tvfslocked);
724 			break;
725 		}
726 		vput(vp);
727 		VFS_UNLOCK_GIANT(vfslocked);
728 		vp = tdp;
729 		vfslocked = tvfslocked;
730 	}
731 	if (error) {
732 		vput(vp);
733 		VFS_UNLOCK_GIANT(vfslocked);
734 		return (error);
735 	}
736 	VOP_UNLOCK(vp, 0, td);
737 	VFS_UNLOCK_GIANT(vfslocked);
738 	FILEDESC_LOCK_FAST(fdp);
739 	vpold = fdp->fd_cdir;
740 	fdp->fd_cdir = vp;
741 	FILEDESC_UNLOCK_FAST(fdp);
742 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
743 	vrele(vpold);
744 	VFS_UNLOCK_GIANT(vfslocked);
745 	return (0);
746 }
747 
748 /*
749  * Change current working directory (``.'').
750  */
751 #ifndef _SYS_SYSPROTO_H_
752 struct chdir_args {
753 	char	*path;
754 };
755 #endif
756 int
757 chdir(td, uap)
758 	struct thread *td;
759 	struct chdir_args /* {
760 		char *path;
761 	} */ *uap;
762 {
763 
764 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
765 }
766 
767 int
768 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
769 {
770 	register struct filedesc *fdp = td->td_proc->p_fd;
771 	int error;
772 	struct nameidata nd;
773 	struct vnode *vp;
774 	int vfslocked;
775 
776 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
777 	    pathseg, path, td);
778 	if ((error = namei(&nd)) != 0)
779 		return (error);
780 	vfslocked = NDHASGIANT(&nd);
781 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
782 		vput(nd.ni_vp);
783 		VFS_UNLOCK_GIANT(vfslocked);
784 		NDFREE(&nd, NDF_ONLY_PNBUF);
785 		return (error);
786 	}
787 	VOP_UNLOCK(nd.ni_vp, 0, td);
788 	VFS_UNLOCK_GIANT(vfslocked);
789 	NDFREE(&nd, NDF_ONLY_PNBUF);
790 	FILEDESC_LOCK_FAST(fdp);
791 	vp = fdp->fd_cdir;
792 	fdp->fd_cdir = nd.ni_vp;
793 	FILEDESC_UNLOCK_FAST(fdp);
794 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
795 	vrele(vp);
796 	VFS_UNLOCK_GIANT(vfslocked);
797 	return (0);
798 }
799 
800 /*
801  * Helper function for raised chroot(2) security function:  Refuse if
802  * any filedescriptors are open directories.
803  */
804 static int
805 chroot_refuse_vdir_fds(fdp)
806 	struct filedesc *fdp;
807 {
808 	struct vnode *vp;
809 	struct file *fp;
810 	int fd;
811 
812 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
813 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
814 		fp = fget_locked(fdp, fd);
815 		if (fp == NULL)
816 			continue;
817 		if (fp->f_type == DTYPE_VNODE) {
818 			vp = fp->f_vnode;
819 			if (vp->v_type == VDIR)
820 				return (EPERM);
821 		}
822 	}
823 	return (0);
824 }
825 
826 /*
827  * This sysctl determines if we will allow a process to chroot(2) if it
828  * has a directory open:
829  *	0: disallowed for all processes.
830  *	1: allowed for processes that were not already chroot(2)'ed.
831  *	2: allowed for all processes.
832  */
833 
834 static int chroot_allow_open_directories = 1;
835 
836 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
837      &chroot_allow_open_directories, 0, "");
838 
839 /*
840  * Change notion of root (``/'') directory.
841  */
842 #ifndef _SYS_SYSPROTO_H_
843 struct chroot_args {
844 	char	*path;
845 };
846 #endif
847 int
848 chroot(td, uap)
849 	struct thread *td;
850 	struct chroot_args /* {
851 		char *path;
852 	} */ *uap;
853 {
854 	int error;
855 	struct nameidata nd;
856 	int vfslocked;
857 
858 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
859 	if (error)
860 		return (error);
861 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
862 	    UIO_USERSPACE, uap->path, td);
863 	error = namei(&nd);
864 	if (error)
865 		goto error;
866 	vfslocked = NDHASGIANT(&nd);
867 	if ((error = change_dir(nd.ni_vp, td)) != 0)
868 		goto e_vunlock;
869 #ifdef MAC
870 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
871 		goto e_vunlock;
872 #endif
873 	VOP_UNLOCK(nd.ni_vp, 0, td);
874 	error = change_root(nd.ni_vp, td);
875 	vrele(nd.ni_vp);
876 	VFS_UNLOCK_GIANT(vfslocked);
877 	NDFREE(&nd, NDF_ONLY_PNBUF);
878 	return (error);
879 e_vunlock:
880 	vput(nd.ni_vp);
881 	VFS_UNLOCK_GIANT(vfslocked);
882 error:
883 	NDFREE(&nd, NDF_ONLY_PNBUF);
884 	return (error);
885 }
886 
887 /*
888  * Common routine for chroot and chdir.  Callers must provide a locked vnode
889  * instance.
890  */
891 int
892 change_dir(vp, td)
893 	struct vnode *vp;
894 	struct thread *td;
895 {
896 	int error;
897 
898 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
899 	if (vp->v_type != VDIR)
900 		return (ENOTDIR);
901 #ifdef MAC
902 	error = mac_check_vnode_chdir(td->td_ucred, vp);
903 	if (error)
904 		return (error);
905 #endif
906 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
907 	return (error);
908 }
909 
910 /*
911  * Common routine for kern_chroot() and jail_attach().  The caller is
912  * responsible for invoking suser() and mac_check_chroot() to authorize this
913  * operation.
914  */
915 int
916 change_root(vp, td)
917 	struct vnode *vp;
918 	struct thread *td;
919 {
920 	struct filedesc *fdp;
921 	struct vnode *oldvp;
922 	int vfslocked;
923 	int error;
924 
925 	VFS_ASSERT_GIANT(vp->v_mount);
926 	fdp = td->td_proc->p_fd;
927 	FILEDESC_LOCK(fdp);
928 	if (chroot_allow_open_directories == 0 ||
929 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
930 		error = chroot_refuse_vdir_fds(fdp);
931 		if (error) {
932 			FILEDESC_UNLOCK(fdp);
933 			return (error);
934 		}
935 	}
936 	oldvp = fdp->fd_rdir;
937 	fdp->fd_rdir = vp;
938 	VREF(fdp->fd_rdir);
939 	if (!fdp->fd_jdir) {
940 		fdp->fd_jdir = vp;
941 		VREF(fdp->fd_jdir);
942 	}
943 	FILEDESC_UNLOCK(fdp);
944 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
945 	vrele(oldvp);
946 	VFS_UNLOCK_GIANT(vfslocked);
947 	return (0);
948 }
949 
950 /*
951  * Check permissions, allocate an open file structure,
952  * and call the device open routine if any.
953  *
954  * MP SAFE
955  */
956 #ifndef _SYS_SYSPROTO_H_
957 struct open_args {
958 	char	*path;
959 	int	flags;
960 	int	mode;
961 };
962 #endif
963 int
964 open(td, uap)
965 	struct thread *td;
966 	register struct open_args /* {
967 		char *path;
968 		int flags;
969 		int mode;
970 	} */ *uap;
971 {
972 	int error;
973 
974 	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
975 	if (mtx_owned(&Giant))
976 		printf("open: %s: %d\n", uap->path, error);
977 	return (error);
978 }
979 
980 int
981 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
982     int mode)
983 {
984 	struct proc *p = td->td_proc;
985 	struct filedesc *fdp = p->p_fd;
986 	struct file *fp;
987 	struct vnode *vp;
988 	struct vattr vat;
989 	struct mount *mp;
990 	int cmode;
991 	struct file *nfp;
992 	int type, indx, error;
993 	struct flock lf;
994 	struct nameidata nd;
995 	int vfslocked;
996 
997 	AUDIT_ARG(fflags, flags);
998 	AUDIT_ARG(mode, mode);
999 	if ((flags & O_ACCMODE) == O_ACCMODE)
1000 		return (EINVAL);
1001 	flags = FFLAGS(flags);
1002 	error = falloc(td, &nfp, &indx);
1003 	if (error)
1004 		return (error);
1005 	/* An extra reference on `nfp' has been held for us by falloc(). */
1006 	fp = nfp;
1007 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1008 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
1009 	td->td_dupfd = -1;		/* XXX check for fdopen */
1010 	error = vn_open(&nd, &flags, cmode, indx);
1011 	if (error) {
1012 		/*
1013 		 * If the vn_open replaced the method vector, something
1014 		 * wonderous happened deep below and we just pass it up
1015 		 * pretending we know what we do.
1016 		 */
1017 		if (error == ENXIO && fp->f_ops != &badfileops) {
1018 			fdrop(fp, td);
1019 			td->td_retval[0] = indx;
1020 			return (0);
1021 		}
1022 
1023 		/*
1024 		 * release our own reference
1025 		 */
1026 		fdrop(fp, td);
1027 
1028 		/*
1029 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1030 		 * responsible for dropping the old contents of ofiles[indx]
1031 		 * if it succeeds.
1032 		 */
1033 		if ((error == ENODEV || error == ENXIO) &&
1034 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1035 		    (error =
1036 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1037 			td->td_retval[0] = indx;
1038 			return (0);
1039 		}
1040 		/*
1041 		 * Clean up the descriptor, but only if another thread hadn't
1042 		 * replaced or closed it.
1043 		 */
1044 		fdclose(fdp, fp, indx, td);
1045 
1046 		if (error == ERESTART)
1047 			error = EINTR;
1048 		return (error);
1049 	}
1050 	td->td_dupfd = 0;
1051 	vfslocked = NDHASGIANT(&nd);
1052 	NDFREE(&nd, NDF_ONLY_PNBUF);
1053 	vp = nd.ni_vp;
1054 
1055 	/*
1056 	 * There should be 2 references on the file, one from the descriptor
1057 	 * table, and one for us.
1058 	 *
1059 	 * Handle the case where someone closed the file (via its file
1060 	 * descriptor) while we were blocked.  The end result should look
1061 	 * like opening the file succeeded but it was immediately closed.
1062 	 * We call vn_close() manually because we haven't yet hooked up
1063 	 * the various 'struct file' fields.
1064 	 */
1065 	FILEDESC_LOCK(fdp);
1066 	FILE_LOCK(fp);
1067 	if (fp->f_count == 1) {
1068 		mp = vp->v_mount;
1069 		KASSERT(fdp->fd_ofiles[indx] != fp,
1070 		    ("Open file descriptor lost all refs"));
1071 		FILE_UNLOCK(fp);
1072 		FILEDESC_UNLOCK(fdp);
1073 		VOP_UNLOCK(vp, 0, td);
1074 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1075 		VFS_UNLOCK_GIANT(vfslocked);
1076 		fdrop(fp, td);
1077 		td->td_retval[0] = indx;
1078 		return (0);
1079 	}
1080 	fp->f_vnode = vp;
1081 	if (fp->f_data == NULL)
1082 		fp->f_data = vp;
1083 	fp->f_flag = flags & FMASK;
1084 	if (fp->f_ops == &badfileops)
1085 		fp->f_ops = &vnops;
1086 	fp->f_seqcount = 1;
1087 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1088 	FILE_UNLOCK(fp);
1089 	FILEDESC_UNLOCK(fdp);
1090 
1091 	VOP_UNLOCK(vp, 0, td);
1092 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1093 		lf.l_whence = SEEK_SET;
1094 		lf.l_start = 0;
1095 		lf.l_len = 0;
1096 		if (flags & O_EXLOCK)
1097 			lf.l_type = F_WRLCK;
1098 		else
1099 			lf.l_type = F_RDLCK;
1100 		type = F_FLOCK;
1101 		if ((flags & FNONBLOCK) == 0)
1102 			type |= F_WAIT;
1103 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1104 			    type)) != 0)
1105 			goto bad;
1106 		fp->f_flag |= FHASLOCK;
1107 	}
1108 	if (flags & O_TRUNC) {
1109 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1110 			goto bad;
1111 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1112 		VATTR_NULL(&vat);
1113 		vat.va_size = 0;
1114 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1115 #ifdef MAC
1116 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1117 		if (error == 0)
1118 #endif
1119 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1120 		VOP_UNLOCK(vp, 0, td);
1121 		vn_finished_write(mp);
1122 		if (error)
1123 			goto bad;
1124 	}
1125 	VFS_UNLOCK_GIANT(vfslocked);
1126 	/*
1127 	 * Release our private reference, leaving the one associated with
1128 	 * the descriptor table intact.
1129 	 */
1130 	fdrop(fp, td);
1131 	td->td_retval[0] = indx;
1132 	return (0);
1133 bad:
1134 	VFS_UNLOCK_GIANT(vfslocked);
1135 	fdclose(fdp, fp, indx, td);
1136 	fdrop(fp, td);
1137 	return (error);
1138 }
1139 
1140 #ifdef COMPAT_43
1141 /*
1142  * Create a file.
1143  *
1144  * MP SAFE
1145  */
1146 #ifndef _SYS_SYSPROTO_H_
1147 struct ocreat_args {
1148 	char	*path;
1149 	int	mode;
1150 };
1151 #endif
1152 int
1153 ocreat(td, uap)
1154 	struct thread *td;
1155 	register struct ocreat_args /* {
1156 		char *path;
1157 		int mode;
1158 	} */ *uap;
1159 {
1160 
1161 	return (kern_open(td, uap->path, UIO_USERSPACE,
1162 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1163 }
1164 #endif /* COMPAT_43 */
1165 
1166 /*
1167  * Create a special file.
1168  */
1169 #ifndef _SYS_SYSPROTO_H_
1170 struct mknod_args {
1171 	char	*path;
1172 	int	mode;
1173 	int	dev;
1174 };
1175 #endif
1176 int
1177 mknod(td, uap)
1178 	struct thread *td;
1179 	register struct mknod_args /* {
1180 		char *path;
1181 		int mode;
1182 		int dev;
1183 	} */ *uap;
1184 {
1185 
1186 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1187 }
1188 
1189 int
1190 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1191     int dev)
1192 {
1193 	struct vnode *vp;
1194 	struct mount *mp;
1195 	struct vattr vattr;
1196 	int error;
1197 	int whiteout = 0;
1198 	struct nameidata nd;
1199 	int vfslocked;
1200 
1201 	AUDIT_ARG(mode, mode);
1202 	AUDIT_ARG(dev, dev);
1203 	switch (mode & S_IFMT) {
1204 	case S_IFCHR:
1205 	case S_IFBLK:
1206 		error = suser(td);
1207 		break;
1208 	default:
1209 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1210 		break;
1211 	}
1212 	if (error)
1213 		return (error);
1214 restart:
1215 	bwillwrite();
1216 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1217 	    pathseg, path, td);
1218 	if ((error = namei(&nd)) != 0)
1219 		return (error);
1220 	vfslocked = NDHASGIANT(&nd);
1221 	vp = nd.ni_vp;
1222 	if (vp != NULL) {
1223 		NDFREE(&nd, NDF_ONLY_PNBUF);
1224 		if (vp == nd.ni_dvp)
1225 			vrele(nd.ni_dvp);
1226 		else
1227 			vput(nd.ni_dvp);
1228 		vrele(vp);
1229 		VFS_UNLOCK_GIANT(vfslocked);
1230 		return (EEXIST);
1231 	} else {
1232 		VATTR_NULL(&vattr);
1233 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1234 		vattr.va_mode = (mode & ALLPERMS) &
1235 		    ~td->td_proc->p_fd->fd_cmask;
1236 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1237 		vattr.va_rdev = dev;
1238 		whiteout = 0;
1239 
1240 		switch (mode & S_IFMT) {
1241 		case S_IFMT:	/* used by badsect to flag bad sectors */
1242 			vattr.va_type = VBAD;
1243 			break;
1244 		case S_IFCHR:
1245 			vattr.va_type = VCHR;
1246 			break;
1247 		case S_IFBLK:
1248 			vattr.va_type = VBLK;
1249 			break;
1250 		case S_IFWHT:
1251 			whiteout = 1;
1252 			break;
1253 		default:
1254 			error = EINVAL;
1255 			break;
1256 		}
1257 	}
1258 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1259 		NDFREE(&nd, NDF_ONLY_PNBUF);
1260 		vput(nd.ni_dvp);
1261 		VFS_UNLOCK_GIANT(vfslocked);
1262 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1263 			return (error);
1264 		goto restart;
1265 	}
1266 #ifdef MAC
1267 	if (error == 0 && !whiteout)
1268 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1269 		    &nd.ni_cnd, &vattr);
1270 #endif
1271 	if (!error) {
1272 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1273 		if (whiteout)
1274 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1275 		else {
1276 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1277 						&nd.ni_cnd, &vattr);
1278 			if (error == 0)
1279 				vput(nd.ni_vp);
1280 		}
1281 	}
1282 	NDFREE(&nd, NDF_ONLY_PNBUF);
1283 	vput(nd.ni_dvp);
1284 	vn_finished_write(mp);
1285 	VFS_UNLOCK_GIANT(vfslocked);
1286 	return (error);
1287 }
1288 
1289 /*
1290  * Create a named pipe.
1291  */
1292 #ifndef _SYS_SYSPROTO_H_
1293 struct mkfifo_args {
1294 	char	*path;
1295 	int	mode;
1296 };
1297 #endif
1298 int
1299 mkfifo(td, uap)
1300 	struct thread *td;
1301 	register struct mkfifo_args /* {
1302 		char *path;
1303 		int mode;
1304 	} */ *uap;
1305 {
1306 
1307 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1308 }
1309 
1310 int
1311 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1312 {
1313 	struct mount *mp;
1314 	struct vattr vattr;
1315 	int error;
1316 	struct nameidata nd;
1317 	int vfslocked;
1318 
1319 restart:
1320 	bwillwrite();
1321 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1322 	    pathseg, path, td);
1323 	if ((error = namei(&nd)) != 0)
1324 		return (error);
1325 	vfslocked = NDHASGIANT(&nd);
1326 	if (nd.ni_vp != NULL) {
1327 		NDFREE(&nd, NDF_ONLY_PNBUF);
1328 		if (nd.ni_vp == nd.ni_dvp)
1329 			vrele(nd.ni_dvp);
1330 		else
1331 			vput(nd.ni_dvp);
1332 		vrele(nd.ni_vp);
1333 		VFS_UNLOCK_GIANT(vfslocked);
1334 		return (EEXIST);
1335 	}
1336 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1337 		NDFREE(&nd, NDF_ONLY_PNBUF);
1338 		vput(nd.ni_dvp);
1339 		VFS_UNLOCK_GIANT(vfslocked);
1340 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1341 			return (error);
1342 		goto restart;
1343 	}
1344 	VATTR_NULL(&vattr);
1345 	vattr.va_type = VFIFO;
1346 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1347 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1348 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1349 #ifdef MAC
1350 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1351 	    &vattr);
1352 	if (error)
1353 		goto out;
1354 #endif
1355 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1356 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1357 	if (error == 0)
1358 		vput(nd.ni_vp);
1359 #ifdef MAC
1360 out:
1361 #endif
1362 	vput(nd.ni_dvp);
1363 	vn_finished_write(mp);
1364 	VFS_UNLOCK_GIANT(vfslocked);
1365 	NDFREE(&nd, NDF_ONLY_PNBUF);
1366 	return (error);
1367 }
1368 
1369 /*
1370  * Make a hard file link.
1371  */
1372 #ifndef _SYS_SYSPROTO_H_
1373 struct link_args {
1374 	char	*path;
1375 	char	*link;
1376 };
1377 #endif
1378 int
1379 link(td, uap)
1380 	struct thread *td;
1381 	register struct link_args /* {
1382 		char *path;
1383 		char *link;
1384 	} */ *uap;
1385 {
1386 	int error;
1387 
1388 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1389 	return (error);
1390 }
1391 
1392 SYSCTL_DECL(_security_bsd);
1393 
1394 static int hardlink_check_uid = 0;
1395 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1396     &hardlink_check_uid, 0,
1397     "Unprivileged processes cannot create hard links to files owned by other "
1398     "users");
1399 static int hardlink_check_gid = 0;
1400 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1401     &hardlink_check_gid, 0,
1402     "Unprivileged processes cannot create hard links to files owned by other "
1403     "groups");
1404 
1405 static int
1406 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1407 {
1408 	struct vattr va;
1409 	int error;
1410 
1411 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1412 		return (0);
1413 
1414 	if (!hardlink_check_uid && !hardlink_check_gid)
1415 		return (0);
1416 
1417 	error = VOP_GETATTR(vp, &va, cred, td);
1418 	if (error != 0)
1419 		return (error);
1420 
1421 	if (hardlink_check_uid) {
1422 		if (cred->cr_uid != va.va_uid)
1423 			return (EPERM);
1424 	}
1425 
1426 	if (hardlink_check_gid) {
1427 		if (!groupmember(va.va_gid, cred))
1428 			return (EPERM);
1429 	}
1430 
1431 	return (0);
1432 }
1433 
1434 int
1435 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1436 {
1437 	struct vnode *vp;
1438 	struct mount *mp;
1439 	struct nameidata nd;
1440 	int vfslocked;
1441 	int lvfslocked;
1442 	int error;
1443 
1444 	bwillwrite();
1445 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1446 	if ((error = namei(&nd)) != 0)
1447 		return (error);
1448 	vfslocked = NDHASGIANT(&nd);
1449 	NDFREE(&nd, NDF_ONLY_PNBUF);
1450 	vp = nd.ni_vp;
1451 	if (vp->v_type == VDIR) {
1452 		vrele(vp);
1453 		VFS_UNLOCK_GIANT(vfslocked);
1454 		return (EPERM);		/* POSIX */
1455 	}
1456 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1457 		vrele(vp);
1458 		VFS_UNLOCK_GIANT(vfslocked);
1459 		return (error);
1460 	}
1461 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1462 	    segflg, link, td);
1463 	if ((error = namei(&nd)) == 0) {
1464 		lvfslocked = NDHASGIANT(&nd);
1465 		if (nd.ni_vp != NULL) {
1466 			if (nd.ni_dvp == nd.ni_vp)
1467 				vrele(nd.ni_dvp);
1468 			else
1469 				vput(nd.ni_dvp);
1470 			vrele(nd.ni_vp);
1471 			error = EEXIST;
1472 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1473 		    == 0) {
1474 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1475 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1476 			error = can_hardlink(vp, td, td->td_ucred);
1477 			if (error == 0)
1478 #ifdef MAC
1479 				error = mac_check_vnode_link(td->td_ucred,
1480 				    nd.ni_dvp, vp, &nd.ni_cnd);
1481 			if (error == 0)
1482 #endif
1483 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1484 			VOP_UNLOCK(vp, 0, td);
1485 			vput(nd.ni_dvp);
1486 		}
1487 		NDFREE(&nd, NDF_ONLY_PNBUF);
1488 		VFS_UNLOCK_GIANT(lvfslocked);
1489 	}
1490 	vrele(vp);
1491 	vn_finished_write(mp);
1492 	VFS_UNLOCK_GIANT(vfslocked);
1493 	return (error);
1494 }
1495 
1496 /*
1497  * Make a symbolic link.
1498  */
1499 #ifndef _SYS_SYSPROTO_H_
1500 struct symlink_args {
1501 	char	*path;
1502 	char	*link;
1503 };
1504 #endif
1505 int
1506 symlink(td, uap)
1507 	struct thread *td;
1508 	register struct symlink_args /* {
1509 		char *path;
1510 		char *link;
1511 	} */ *uap;
1512 {
1513 
1514 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1515 }
1516 
1517 int
1518 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1519 {
1520 	struct mount *mp;
1521 	struct vattr vattr;
1522 	char *syspath;
1523 	int error;
1524 	struct nameidata nd;
1525 	int vfslocked;
1526 
1527 	if (segflg == UIO_SYSSPACE) {
1528 		syspath = path;
1529 	} else {
1530 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1531 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1532 			goto out;
1533 	}
1534 restart:
1535 	bwillwrite();
1536 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1537 	    segflg, link, td);
1538 	if ((error = namei(&nd)) != 0)
1539 		goto out;
1540 	vfslocked = NDHASGIANT(&nd);
1541 	if (nd.ni_vp) {
1542 		NDFREE(&nd, NDF_ONLY_PNBUF);
1543 		if (nd.ni_vp == nd.ni_dvp)
1544 			vrele(nd.ni_dvp);
1545 		else
1546 			vput(nd.ni_dvp);
1547 		vrele(nd.ni_vp);
1548 		VFS_UNLOCK_GIANT(vfslocked);
1549 		error = EEXIST;
1550 		goto out;
1551 	}
1552 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1553 		NDFREE(&nd, NDF_ONLY_PNBUF);
1554 		vput(nd.ni_dvp);
1555 		VFS_UNLOCK_GIANT(vfslocked);
1556 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1557 			goto out;
1558 		goto restart;
1559 	}
1560 	VATTR_NULL(&vattr);
1561 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1562 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1563 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1564 #ifdef MAC
1565 	vattr.va_type = VLNK;
1566 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1567 	    &vattr);
1568 	if (error)
1569 		goto out2;
1570 #endif
1571 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1572 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1573 	if (error == 0)
1574 		vput(nd.ni_vp);
1575 #ifdef MAC
1576 out2:
1577 #endif
1578 	NDFREE(&nd, NDF_ONLY_PNBUF);
1579 	vput(nd.ni_dvp);
1580 	vn_finished_write(mp);
1581 	VFS_UNLOCK_GIANT(vfslocked);
1582 out:
1583 	if (segflg != UIO_SYSSPACE)
1584 		uma_zfree(namei_zone, syspath);
1585 	return (error);
1586 }
1587 
1588 /*
1589  * Delete a whiteout from the filesystem.
1590  */
1591 int
1592 undelete(td, uap)
1593 	struct thread *td;
1594 	register struct undelete_args /* {
1595 		char *path;
1596 	} */ *uap;
1597 {
1598 	int error;
1599 	struct mount *mp;
1600 	struct nameidata nd;
1601 	int vfslocked;
1602 
1603 restart:
1604 	bwillwrite();
1605 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1606 	    UIO_USERSPACE, uap->path, td);
1607 	error = namei(&nd);
1608 	if (error)
1609 		return (error);
1610 	vfslocked = NDHASGIANT(&nd);
1611 
1612 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1613 		NDFREE(&nd, NDF_ONLY_PNBUF);
1614 		if (nd.ni_vp == nd.ni_dvp)
1615 			vrele(nd.ni_dvp);
1616 		else
1617 			vput(nd.ni_dvp);
1618 		if (nd.ni_vp)
1619 			vrele(nd.ni_vp);
1620 		VFS_UNLOCK_GIANT(vfslocked);
1621 		return (EEXIST);
1622 	}
1623 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1624 		NDFREE(&nd, NDF_ONLY_PNBUF);
1625 		vput(nd.ni_dvp);
1626 		VFS_UNLOCK_GIANT(vfslocked);
1627 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1628 			return (error);
1629 		goto restart;
1630 	}
1631 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1632 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1633 	NDFREE(&nd, NDF_ONLY_PNBUF);
1634 	vput(nd.ni_dvp);
1635 	vn_finished_write(mp);
1636 	VFS_UNLOCK_GIANT(vfslocked);
1637 	return (error);
1638 }
1639 
1640 /*
1641  * Delete a name from the filesystem.
1642  */
1643 #ifndef _SYS_SYSPROTO_H_
1644 struct unlink_args {
1645 	char	*path;
1646 };
1647 #endif
1648 int
1649 unlink(td, uap)
1650 	struct thread *td;
1651 	struct unlink_args /* {
1652 		char *path;
1653 	} */ *uap;
1654 {
1655 	int error;
1656 
1657 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1658 	return (error);
1659 }
1660 
1661 int
1662 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1663 {
1664 	struct mount *mp;
1665 	struct vnode *vp;
1666 	int error;
1667 	struct nameidata nd;
1668 	int vfslocked;
1669 
1670 restart:
1671 	bwillwrite();
1672 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1673 	    pathseg, path, td);
1674 	if ((error = namei(&nd)) != 0)
1675 		return (error == EINVAL ? EPERM : error);
1676 	vfslocked = NDHASGIANT(&nd);
1677 	vp = nd.ni_vp;
1678 	if (vp->v_type == VDIR)
1679 		error = EPERM;		/* POSIX */
1680 	else {
1681 		/*
1682 		 * The root of a mounted filesystem cannot be deleted.
1683 		 *
1684 		 * XXX: can this only be a VDIR case?
1685 		 */
1686 		if (vp->v_vflag & VV_ROOT)
1687 			error = EBUSY;
1688 	}
1689 	if (error == 0) {
1690 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1691 			NDFREE(&nd, NDF_ONLY_PNBUF);
1692 			vput(nd.ni_dvp);
1693 			if (vp == nd.ni_dvp)
1694 				vrele(vp);
1695 			else
1696 				vput(vp);
1697 			VFS_UNLOCK_GIANT(vfslocked);
1698 			if ((error = vn_start_write(NULL, &mp,
1699 			    V_XSLEEP | PCATCH)) != 0)
1700 				return (error);
1701 			goto restart;
1702 		}
1703 #ifdef MAC
1704 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1705 		    &nd.ni_cnd);
1706 		if (error)
1707 			goto out;
1708 #endif
1709 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1710 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1711 #ifdef MAC
1712 out:
1713 #endif
1714 		vn_finished_write(mp);
1715 	}
1716 	NDFREE(&nd, NDF_ONLY_PNBUF);
1717 	vput(nd.ni_dvp);
1718 	if (vp == nd.ni_dvp)
1719 		vrele(vp);
1720 	else
1721 		vput(vp);
1722 	VFS_UNLOCK_GIANT(vfslocked);
1723 	return (error);
1724 }
1725 
1726 /*
1727  * Reposition read/write file offset.
1728  */
1729 #ifndef _SYS_SYSPROTO_H_
1730 struct lseek_args {
1731 	int	fd;
1732 	int	pad;
1733 	off_t	offset;
1734 	int	whence;
1735 };
1736 #endif
1737 int
1738 lseek(td, uap)
1739 	struct thread *td;
1740 	register struct lseek_args /* {
1741 		int fd;
1742 		int pad;
1743 		off_t offset;
1744 		int whence;
1745 	} */ *uap;
1746 {
1747 	struct ucred *cred = td->td_ucred;
1748 	struct file *fp;
1749 	struct vnode *vp;
1750 	struct vattr vattr;
1751 	off_t offset;
1752 	int error, noneg;
1753 	int vfslocked;
1754 
1755 	if ((error = fget(td, uap->fd, &fp)) != 0)
1756 		return (error);
1757 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1758 		fdrop(fp, td);
1759 		return (ESPIPE);
1760 	}
1761 	vp = fp->f_vnode;
1762 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1763 	noneg = (vp->v_type != VCHR);
1764 	offset = uap->offset;
1765 	switch (uap->whence) {
1766 	case L_INCR:
1767 		if (noneg &&
1768 		    (fp->f_offset < 0 ||
1769 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1770 			error = EOVERFLOW;
1771 			break;
1772 		}
1773 		offset += fp->f_offset;
1774 		break;
1775 	case L_XTND:
1776 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1777 		error = VOP_GETATTR(vp, &vattr, cred, td);
1778 		VOP_UNLOCK(vp, 0, td);
1779 		if (error)
1780 			break;
1781 		if (noneg &&
1782 		    (vattr.va_size > OFF_MAX ||
1783 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1784 			error = EOVERFLOW;
1785 			break;
1786 		}
1787 		offset += vattr.va_size;
1788 		break;
1789 	case L_SET:
1790 		break;
1791 	default:
1792 		error = EINVAL;
1793 	}
1794 	if (error == 0 && noneg && offset < 0)
1795 		error = EINVAL;
1796 	if (error != 0)
1797 		goto drop;
1798 	fp->f_offset = offset;
1799 	*(off_t *)(td->td_retval) = fp->f_offset;
1800 drop:
1801 	fdrop(fp, td);
1802 	VFS_UNLOCK_GIANT(vfslocked);
1803 	return (error);
1804 }
1805 
1806 #if defined(COMPAT_43)
1807 /*
1808  * Reposition read/write file offset.
1809  */
1810 #ifndef _SYS_SYSPROTO_H_
1811 struct olseek_args {
1812 	int	fd;
1813 	long	offset;
1814 	int	whence;
1815 };
1816 #endif
1817 int
1818 olseek(td, uap)
1819 	struct thread *td;
1820 	register struct olseek_args /* {
1821 		int fd;
1822 		long offset;
1823 		int whence;
1824 	} */ *uap;
1825 {
1826 	struct lseek_args /* {
1827 		int fd;
1828 		int pad;
1829 		off_t offset;
1830 		int whence;
1831 	} */ nuap;
1832 	int error;
1833 
1834 	nuap.fd = uap->fd;
1835 	nuap.offset = uap->offset;
1836 	nuap.whence = uap->whence;
1837 	error = lseek(td, &nuap);
1838 	return (error);
1839 }
1840 #endif /* COMPAT_43 */
1841 
1842 /*
1843  * Check access permissions using passed credentials.
1844  */
1845 static int
1846 vn_access(vp, user_flags, cred, td)
1847 	struct vnode	*vp;
1848 	int		user_flags;
1849 	struct ucred	*cred;
1850 	struct thread	*td;
1851 {
1852 	int error, flags;
1853 
1854 	/* Flags == 0 means only check for existence. */
1855 	error = 0;
1856 	if (user_flags) {
1857 		flags = 0;
1858 		if (user_flags & R_OK)
1859 			flags |= VREAD;
1860 		if (user_flags & W_OK)
1861 			flags |= VWRITE;
1862 		if (user_flags & X_OK)
1863 			flags |= VEXEC;
1864 #ifdef MAC
1865 		error = mac_check_vnode_access(cred, vp, flags);
1866 		if (error)
1867 			return (error);
1868 #endif
1869 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1870 			error = VOP_ACCESS(vp, flags, cred, td);
1871 	}
1872 	return (error);
1873 }
1874 
1875 /*
1876  * Check access permissions using "real" credentials.
1877  */
1878 #ifndef _SYS_SYSPROTO_H_
1879 struct access_args {
1880 	char	*path;
1881 	int	flags;
1882 };
1883 #endif
1884 int
1885 access(td, uap)
1886 	struct thread *td;
1887 	register struct access_args /* {
1888 		char *path;
1889 		int flags;
1890 	} */ *uap;
1891 {
1892 
1893 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1894 }
1895 
1896 int
1897 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1898 {
1899 	struct ucred *cred, *tmpcred;
1900 	register struct vnode *vp;
1901 	struct nameidata nd;
1902 	int vfslocked;
1903 	int error;
1904 
1905 	/*
1906 	 * Create and modify a temporary credential instead of one that
1907 	 * is potentially shared.  This could also mess up socket
1908 	 * buffer accounting which can run in an interrupt context.
1909 	 */
1910 	cred = td->td_ucred;
1911 	tmpcred = crdup(cred);
1912 	tmpcred->cr_uid = cred->cr_ruid;
1913 	tmpcred->cr_groups[0] = cred->cr_rgid;
1914 	td->td_ucred = tmpcred;
1915 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1916 	    pathseg, path, td);
1917 	if ((error = namei(&nd)) != 0)
1918 		goto out1;
1919 	vfslocked = NDHASGIANT(&nd);
1920 	vp = nd.ni_vp;
1921 
1922 	error = vn_access(vp, flags, tmpcred, td);
1923 	NDFREE(&nd, NDF_ONLY_PNBUF);
1924 	vput(vp);
1925 	VFS_UNLOCK_GIANT(vfslocked);
1926 out1:
1927 	td->td_ucred = cred;
1928 	crfree(tmpcred);
1929 	return (error);
1930 }
1931 
1932 /*
1933  * Check access permissions using "effective" credentials.
1934  */
1935 #ifndef _SYS_SYSPROTO_H_
1936 struct eaccess_args {
1937 	char	*path;
1938 	int	flags;
1939 };
1940 #endif
1941 int
1942 eaccess(td, uap)
1943 	struct thread *td;
1944 	register struct eaccess_args /* {
1945 		char *path;
1946 		int flags;
1947 	} */ *uap;
1948 {
1949 
1950 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1951 }
1952 
1953 int
1954 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1955 {
1956 	struct nameidata nd;
1957 	struct vnode *vp;
1958 	int vfslocked;
1959 	int error;
1960 
1961 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1962 	    pathseg, path, td);
1963 	if ((error = namei(&nd)) != 0)
1964 		return (error);
1965 	vp = nd.ni_vp;
1966 	vfslocked = NDHASGIANT(&nd);
1967 	error = vn_access(vp, flags, td->td_ucred, td);
1968 	NDFREE(&nd, NDF_ONLY_PNBUF);
1969 	vput(vp);
1970 	VFS_UNLOCK_GIANT(vfslocked);
1971 	return (error);
1972 }
1973 
1974 #if defined(COMPAT_43)
1975 /*
1976  * Get file status; this version follows links.
1977  */
1978 #ifndef _SYS_SYSPROTO_H_
1979 struct ostat_args {
1980 	char	*path;
1981 	struct ostat *ub;
1982 };
1983 #endif
1984 int
1985 ostat(td, uap)
1986 	struct thread *td;
1987 	register struct ostat_args /* {
1988 		char *path;
1989 		struct ostat *ub;
1990 	} */ *uap;
1991 {
1992 	struct stat sb;
1993 	struct ostat osb;
1994 	int error;
1995 
1996 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1997 	if (error)
1998 		return (error);
1999 	cvtstat(&sb, &osb);
2000 	error = copyout(&osb, uap->ub, sizeof (osb));
2001 	return (error);
2002 }
2003 
2004 /*
2005  * Get file status; this version does not follow links.
2006  */
2007 #ifndef _SYS_SYSPROTO_H_
2008 struct olstat_args {
2009 	char	*path;
2010 	struct ostat *ub;
2011 };
2012 #endif
2013 int
2014 olstat(td, uap)
2015 	struct thread *td;
2016 	register struct olstat_args /* {
2017 		char *path;
2018 		struct ostat *ub;
2019 	} */ *uap;
2020 {
2021 	struct stat sb;
2022 	struct ostat osb;
2023 	int error;
2024 
2025 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2026 	if (error)
2027 		return (error);
2028 	cvtstat(&sb, &osb);
2029 	error = copyout(&osb, uap->ub, sizeof (osb));
2030 	return (error);
2031 }
2032 
2033 /*
2034  * Convert from an old to a new stat structure.
2035  */
2036 void
2037 cvtstat(st, ost)
2038 	struct stat *st;
2039 	struct ostat *ost;
2040 {
2041 
2042 	ost->st_dev = st->st_dev;
2043 	ost->st_ino = st->st_ino;
2044 	ost->st_mode = st->st_mode;
2045 	ost->st_nlink = st->st_nlink;
2046 	ost->st_uid = st->st_uid;
2047 	ost->st_gid = st->st_gid;
2048 	ost->st_rdev = st->st_rdev;
2049 	if (st->st_size < (quad_t)1 << 32)
2050 		ost->st_size = st->st_size;
2051 	else
2052 		ost->st_size = -2;
2053 	ost->st_atime = st->st_atime;
2054 	ost->st_mtime = st->st_mtime;
2055 	ost->st_ctime = st->st_ctime;
2056 	ost->st_blksize = st->st_blksize;
2057 	ost->st_blocks = st->st_blocks;
2058 	ost->st_flags = st->st_flags;
2059 	ost->st_gen = st->st_gen;
2060 }
2061 #endif /* COMPAT_43 */
2062 
2063 /*
2064  * Get file status; this version follows links.
2065  */
2066 #ifndef _SYS_SYSPROTO_H_
2067 struct stat_args {
2068 	char	*path;
2069 	struct stat *ub;
2070 };
2071 #endif
2072 int
2073 stat(td, uap)
2074 	struct thread *td;
2075 	register struct stat_args /* {
2076 		char *path;
2077 		struct stat *ub;
2078 	} */ *uap;
2079 {
2080 	struct stat sb;
2081 	int error;
2082 
2083 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2084 	if (error == 0)
2085 		error = copyout(&sb, uap->ub, sizeof (sb));
2086 	return (error);
2087 }
2088 
2089 int
2090 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2091 {
2092 	struct nameidata nd;
2093 	struct stat sb;
2094 	int error, vfslocked;
2095 
2096 	NDINIT(&nd, LOOKUP,
2097 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2098 	    pathseg, path, td);
2099 	if ((error = namei(&nd)) != 0)
2100 		return (error);
2101 	vfslocked = NDHASGIANT(&nd);
2102 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2103 	NDFREE(&nd, NDF_ONLY_PNBUF);
2104 	vput(nd.ni_vp);
2105 	VFS_UNLOCK_GIANT(vfslocked);
2106 	if (error)
2107 		return (error);
2108 	*sbp = sb;
2109 	return (0);
2110 }
2111 
2112 /*
2113  * Get file status; this version does not follow links.
2114  */
2115 #ifndef _SYS_SYSPROTO_H_
2116 struct lstat_args {
2117 	char	*path;
2118 	struct stat *ub;
2119 };
2120 #endif
2121 int
2122 lstat(td, uap)
2123 	struct thread *td;
2124 	register struct lstat_args /* {
2125 		char *path;
2126 		struct stat *ub;
2127 	} */ *uap;
2128 {
2129 	struct stat sb;
2130 	int error;
2131 
2132 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2133 	if (error == 0)
2134 		error = copyout(&sb, uap->ub, sizeof (sb));
2135 	return (error);
2136 }
2137 
2138 int
2139 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2140 {
2141 	struct vnode *vp;
2142 	struct stat sb;
2143 	struct nameidata nd;
2144 	int error, vfslocked;
2145 
2146 	NDINIT(&nd, LOOKUP,
2147 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2148 	    pathseg, path, td);
2149 	if ((error = namei(&nd)) != 0)
2150 		return (error);
2151 	vfslocked = NDHASGIANT(&nd);
2152 	vp = nd.ni_vp;
2153 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2154 	NDFREE(&nd, NDF_ONLY_PNBUF);
2155 	vput(vp);
2156 	VFS_UNLOCK_GIANT(vfslocked);
2157 	if (error)
2158 		return (error);
2159 	*sbp = sb;
2160 	return (0);
2161 }
2162 
2163 /*
2164  * Implementation of the NetBSD [l]stat() functions.
2165  */
2166 void
2167 cvtnstat(sb, nsb)
2168 	struct stat *sb;
2169 	struct nstat *nsb;
2170 {
2171 	bzero(nsb, sizeof *nsb);
2172 	nsb->st_dev = sb->st_dev;
2173 	nsb->st_ino = sb->st_ino;
2174 	nsb->st_mode = sb->st_mode;
2175 	nsb->st_nlink = sb->st_nlink;
2176 	nsb->st_uid = sb->st_uid;
2177 	nsb->st_gid = sb->st_gid;
2178 	nsb->st_rdev = sb->st_rdev;
2179 	nsb->st_atimespec = sb->st_atimespec;
2180 	nsb->st_mtimespec = sb->st_mtimespec;
2181 	nsb->st_ctimespec = sb->st_ctimespec;
2182 	nsb->st_size = sb->st_size;
2183 	nsb->st_blocks = sb->st_blocks;
2184 	nsb->st_blksize = sb->st_blksize;
2185 	nsb->st_flags = sb->st_flags;
2186 	nsb->st_gen = sb->st_gen;
2187 	nsb->st_birthtimespec = sb->st_birthtimespec;
2188 }
2189 
2190 #ifndef _SYS_SYSPROTO_H_
2191 struct nstat_args {
2192 	char	*path;
2193 	struct nstat *ub;
2194 };
2195 #endif
2196 int
2197 nstat(td, uap)
2198 	struct thread *td;
2199 	register struct nstat_args /* {
2200 		char *path;
2201 		struct nstat *ub;
2202 	} */ *uap;
2203 {
2204 	struct stat sb;
2205 	struct nstat nsb;
2206 	int error;
2207 
2208 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2209 	if (error)
2210 		return (error);
2211 	cvtnstat(&sb, &nsb);
2212 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2213 	return (error);
2214 }
2215 
2216 /*
2217  * NetBSD lstat.  Get file status; this version does not follow links.
2218  */
2219 #ifndef _SYS_SYSPROTO_H_
2220 struct lstat_args {
2221 	char	*path;
2222 	struct stat *ub;
2223 };
2224 #endif
2225 int
2226 nlstat(td, uap)
2227 	struct thread *td;
2228 	register struct nlstat_args /* {
2229 		char *path;
2230 		struct nstat *ub;
2231 	} */ *uap;
2232 {
2233 	struct stat sb;
2234 	struct nstat nsb;
2235 	int error;
2236 
2237 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2238 	if (error)
2239 		return (error);
2240 	cvtnstat(&sb, &nsb);
2241 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2242 	return (error);
2243 }
2244 
2245 /*
2246  * Get configurable pathname variables.
2247  */
2248 #ifndef _SYS_SYSPROTO_H_
2249 struct pathconf_args {
2250 	char	*path;
2251 	int	name;
2252 };
2253 #endif
2254 int
2255 pathconf(td, uap)
2256 	struct thread *td;
2257 	register struct pathconf_args /* {
2258 		char *path;
2259 		int name;
2260 	} */ *uap;
2261 {
2262 
2263 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2264 }
2265 
2266 int
2267 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2268 {
2269 	struct nameidata nd;
2270 	int error, vfslocked;
2271 
2272 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2273 	    pathseg, path, td);
2274 	if ((error = namei(&nd)) != 0)
2275 		return (error);
2276 	vfslocked = NDHASGIANT(&nd);
2277 	NDFREE(&nd, NDF_ONLY_PNBUF);
2278 
2279 	/* If asynchronous I/O is available, it works for all files. */
2280 	if (name == _PC_ASYNC_IO)
2281 		td->td_retval[0] = async_io_version;
2282 	else
2283 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2284 	vput(nd.ni_vp);
2285 	VFS_UNLOCK_GIANT(vfslocked);
2286 	return (error);
2287 }
2288 
2289 /*
2290  * Return target name of a symbolic link.
2291  */
2292 #ifndef _SYS_SYSPROTO_H_
2293 struct readlink_args {
2294 	char	*path;
2295 	char	*buf;
2296 	int	count;
2297 };
2298 #endif
2299 int
2300 readlink(td, uap)
2301 	struct thread *td;
2302 	register struct readlink_args /* {
2303 		char *path;
2304 		char *buf;
2305 		int count;
2306 	} */ *uap;
2307 {
2308 
2309 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2310 	    UIO_USERSPACE, uap->count));
2311 }
2312 
2313 int
2314 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2315     enum uio_seg bufseg, int count)
2316 {
2317 	register struct vnode *vp;
2318 	struct iovec aiov;
2319 	struct uio auio;
2320 	int error;
2321 	struct nameidata nd;
2322 	int vfslocked;
2323 
2324 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2325 	    pathseg, path, td);
2326 	if ((error = namei(&nd)) != 0)
2327 		return (error);
2328 	NDFREE(&nd, NDF_ONLY_PNBUF);
2329 	vfslocked = NDHASGIANT(&nd);
2330 	vp = nd.ni_vp;
2331 #ifdef MAC
2332 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2333 	if (error) {
2334 		vput(vp);
2335 		VFS_UNLOCK_GIANT(vfslocked);
2336 		return (error);
2337 	}
2338 #endif
2339 	if (vp->v_type != VLNK)
2340 		error = EINVAL;
2341 	else {
2342 		aiov.iov_base = buf;
2343 		aiov.iov_len = count;
2344 		auio.uio_iov = &aiov;
2345 		auio.uio_iovcnt = 1;
2346 		auio.uio_offset = 0;
2347 		auio.uio_rw = UIO_READ;
2348 		auio.uio_segflg = bufseg;
2349 		auio.uio_td = td;
2350 		auio.uio_resid = count;
2351 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2352 	}
2353 	vput(vp);
2354 	VFS_UNLOCK_GIANT(vfslocked);
2355 	td->td_retval[0] = count - auio.uio_resid;
2356 	return (error);
2357 }
2358 
2359 /*
2360  * Common implementation code for chflags() and fchflags().
2361  */
2362 static int
2363 setfflags(td, vp, flags)
2364 	struct thread *td;
2365 	struct vnode *vp;
2366 	int flags;
2367 {
2368 	int error;
2369 	struct mount *mp;
2370 	struct vattr vattr;
2371 
2372 	/*
2373 	 * Prevent non-root users from setting flags on devices.  When
2374 	 * a device is reused, users can retain ownership of the device
2375 	 * if they are allowed to set flags and programs assume that
2376 	 * chown can't fail when done as root.
2377 	 */
2378 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2379 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2380 		if (error)
2381 			return (error);
2382 	}
2383 
2384 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2385 		return (error);
2386 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2387 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2388 	VATTR_NULL(&vattr);
2389 	vattr.va_flags = flags;
2390 #ifdef MAC
2391 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2392 	if (error == 0)
2393 #endif
2394 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2395 	VOP_UNLOCK(vp, 0, td);
2396 	vn_finished_write(mp);
2397 	return (error);
2398 }
2399 
2400 /*
2401  * Change flags of a file given a path name.
2402  */
2403 #ifndef _SYS_SYSPROTO_H_
2404 struct chflags_args {
2405 	char	*path;
2406 	int	flags;
2407 };
2408 #endif
2409 int
2410 chflags(td, uap)
2411 	struct thread *td;
2412 	register struct chflags_args /* {
2413 		char *path;
2414 		int flags;
2415 	} */ *uap;
2416 {
2417 	int error;
2418 	struct nameidata nd;
2419 	int vfslocked;
2420 
2421 	AUDIT_ARG(fflags, uap->flags);
2422 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2423 	    uap->path, td);
2424 	if ((error = namei(&nd)) != 0)
2425 		return (error);
2426 	NDFREE(&nd, NDF_ONLY_PNBUF);
2427 	vfslocked = NDHASGIANT(&nd);
2428 	error = setfflags(td, nd.ni_vp, uap->flags);
2429 	vrele(nd.ni_vp);
2430 	VFS_UNLOCK_GIANT(vfslocked);
2431 	return (error);
2432 }
2433 
2434 /*
2435  * Same as chflags() but doesn't follow symlinks.
2436  */
2437 int
2438 lchflags(td, uap)
2439 	struct thread *td;
2440 	register struct lchflags_args /* {
2441 		char *path;
2442 		int flags;
2443 	} */ *uap;
2444 {
2445 	int error;
2446 	struct nameidata nd;
2447 	int vfslocked;
2448 
2449 	AUDIT_ARG(fflags, uap->flags);
2450 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2451 	    uap->path, td);
2452 	if ((error = namei(&nd)) != 0)
2453 		return (error);
2454 	vfslocked = NDHASGIANT(&nd);
2455 	NDFREE(&nd, NDF_ONLY_PNBUF);
2456 	error = setfflags(td, nd.ni_vp, uap->flags);
2457 	vrele(nd.ni_vp);
2458 	VFS_UNLOCK_GIANT(vfslocked);
2459 	return (error);
2460 }
2461 
2462 /*
2463  * Change flags of a file given a file descriptor.
2464  */
2465 #ifndef _SYS_SYSPROTO_H_
2466 struct fchflags_args {
2467 	int	fd;
2468 	int	flags;
2469 };
2470 #endif
2471 int
2472 fchflags(td, uap)
2473 	struct thread *td;
2474 	register struct fchflags_args /* {
2475 		int fd;
2476 		int flags;
2477 	} */ *uap;
2478 {
2479 	struct file *fp;
2480 	int vfslocked;
2481 	int error;
2482 
2483 	AUDIT_ARG(fd, uap->fd);
2484 	AUDIT_ARG(fflags, uap->flags);
2485 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2486 		return (error);
2487 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2488 #ifdef AUDIT
2489 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2490 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2491 	VOP_UNLOCK(fp->f_vnode, 0, td);
2492 #endif
2493 	error = setfflags(td, fp->f_vnode, uap->flags);
2494 	VFS_UNLOCK_GIANT(vfslocked);
2495 	fdrop(fp, td);
2496 	return (error);
2497 }
2498 
2499 /*
2500  * Common implementation code for chmod(), lchmod() and fchmod().
2501  */
2502 static int
2503 setfmode(td, vp, mode)
2504 	struct thread *td;
2505 	struct vnode *vp;
2506 	int mode;
2507 {
2508 	int error;
2509 	struct mount *mp;
2510 	struct vattr vattr;
2511 
2512 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2513 		return (error);
2514 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2515 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2516 	VATTR_NULL(&vattr);
2517 	vattr.va_mode = mode & ALLPERMS;
2518 #ifdef MAC
2519 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2520 	if (error == 0)
2521 #endif
2522 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2523 	VOP_UNLOCK(vp, 0, td);
2524 	vn_finished_write(mp);
2525 	return (error);
2526 }
2527 
2528 /*
2529  * Change mode of a file given path name.
2530  */
2531 #ifndef _SYS_SYSPROTO_H_
2532 struct chmod_args {
2533 	char	*path;
2534 	int	mode;
2535 };
2536 #endif
2537 int
2538 chmod(td, uap)
2539 	struct thread *td;
2540 	register struct chmod_args /* {
2541 		char *path;
2542 		int mode;
2543 	} */ *uap;
2544 {
2545 
2546 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2547 }
2548 
2549 int
2550 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2551 {
2552 	int error;
2553 	struct nameidata nd;
2554 	int vfslocked;
2555 
2556 	AUDIT_ARG(mode, mode);
2557 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2558 	if ((error = namei(&nd)) != 0)
2559 		return (error);
2560 	vfslocked = NDHASGIANT(&nd);
2561 	NDFREE(&nd, NDF_ONLY_PNBUF);
2562 	error = setfmode(td, nd.ni_vp, mode);
2563 	vrele(nd.ni_vp);
2564 	VFS_UNLOCK_GIANT(vfslocked);
2565 	return (error);
2566 }
2567 
2568 /*
2569  * Change mode of a file given path name (don't follow links.)
2570  */
2571 #ifndef _SYS_SYSPROTO_H_
2572 struct lchmod_args {
2573 	char	*path;
2574 	int	mode;
2575 };
2576 #endif
2577 int
2578 lchmod(td, uap)
2579 	struct thread *td;
2580 	register struct lchmod_args /* {
2581 		char *path;
2582 		int mode;
2583 	} */ *uap;
2584 {
2585 	int error;
2586 	struct nameidata nd;
2587 	int vfslocked;
2588 
2589 	AUDIT_ARG(mode, (mode_t)uap->mode);
2590 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2591 	    uap->path, td);
2592 	if ((error = namei(&nd)) != 0)
2593 		return (error);
2594 	vfslocked = NDHASGIANT(&nd);
2595 	NDFREE(&nd, NDF_ONLY_PNBUF);
2596 	error = setfmode(td, nd.ni_vp, uap->mode);
2597 	vrele(nd.ni_vp);
2598 	VFS_UNLOCK_GIANT(vfslocked);
2599 	return (error);
2600 }
2601 
2602 /*
2603  * Change mode of a file given a file descriptor.
2604  */
2605 #ifndef _SYS_SYSPROTO_H_
2606 struct fchmod_args {
2607 	int	fd;
2608 	int	mode;
2609 };
2610 #endif
2611 int
2612 fchmod(td, uap)
2613 	struct thread *td;
2614 	register struct fchmod_args /* {
2615 		int fd;
2616 		int mode;
2617 	} */ *uap;
2618 {
2619 	struct file *fp;
2620 	int vfslocked;
2621 	int error;
2622 
2623 	AUDIT_ARG(fd, uap->fd);
2624 	AUDIT_ARG(mode, uap->mode);
2625 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2626 		return (error);
2627 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2628 #ifdef AUDIT
2629 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2630 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2631 	VOP_UNLOCK(fp->f_vnode, 0, td);
2632 #endif
2633 	error = setfmode(td, fp->f_vnode, uap->mode);
2634 	VFS_UNLOCK_GIANT(vfslocked);
2635 	fdrop(fp, td);
2636 	return (error);
2637 }
2638 
2639 /*
2640  * Common implementation for chown(), lchown(), and fchown()
2641  */
2642 static int
2643 setfown(td, vp, uid, gid)
2644 	struct thread *td;
2645 	struct vnode *vp;
2646 	uid_t uid;
2647 	gid_t gid;
2648 {
2649 	int error;
2650 	struct mount *mp;
2651 	struct vattr vattr;
2652 
2653 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2654 		return (error);
2655 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2656 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2657 	VATTR_NULL(&vattr);
2658 	vattr.va_uid = uid;
2659 	vattr.va_gid = gid;
2660 #ifdef MAC
2661 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2662 	    vattr.va_gid);
2663 	if (error == 0)
2664 #endif
2665 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2666 	VOP_UNLOCK(vp, 0, td);
2667 	vn_finished_write(mp);
2668 	return (error);
2669 }
2670 
2671 /*
2672  * Set ownership given a path name.
2673  */
2674 #ifndef _SYS_SYSPROTO_H_
2675 struct chown_args {
2676 	char	*path;
2677 	int	uid;
2678 	int	gid;
2679 };
2680 #endif
2681 int
2682 chown(td, uap)
2683 	struct thread *td;
2684 	register struct chown_args /* {
2685 		char *path;
2686 		int uid;
2687 		int gid;
2688 	} */ *uap;
2689 {
2690 
2691 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2692 }
2693 
2694 int
2695 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2696     int gid)
2697 {
2698 	int error;
2699 	struct nameidata nd;
2700 	int vfslocked;
2701 
2702 	AUDIT_ARG(owner, uid, gid);
2703 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2704 	if ((error = namei(&nd)) != 0)
2705 		return (error);
2706 	vfslocked = NDHASGIANT(&nd);
2707 	NDFREE(&nd, NDF_ONLY_PNBUF);
2708 	error = setfown(td, nd.ni_vp, uid, gid);
2709 	vrele(nd.ni_vp);
2710 	VFS_UNLOCK_GIANT(vfslocked);
2711 	return (error);
2712 }
2713 
2714 /*
2715  * Set ownership given a path name, do not cross symlinks.
2716  */
2717 #ifndef _SYS_SYSPROTO_H_
2718 struct lchown_args {
2719 	char	*path;
2720 	int	uid;
2721 	int	gid;
2722 };
2723 #endif
2724 int
2725 lchown(td, uap)
2726 	struct thread *td;
2727 	register struct lchown_args /* {
2728 		char *path;
2729 		int uid;
2730 		int gid;
2731 	} */ *uap;
2732 {
2733 
2734 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2735 }
2736 
2737 int
2738 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2739     int gid)
2740 {
2741 	int error;
2742 	struct nameidata nd;
2743 	int vfslocked;
2744 
2745 	AUDIT_ARG(owner, uid, gid);
2746 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2747 	if ((error = namei(&nd)) != 0)
2748 		return (error);
2749 	vfslocked = NDHASGIANT(&nd);
2750 	NDFREE(&nd, NDF_ONLY_PNBUF);
2751 	error = setfown(td, nd.ni_vp, uid, gid);
2752 	vrele(nd.ni_vp);
2753 	VFS_UNLOCK_GIANT(vfslocked);
2754 	return (error);
2755 }
2756 
2757 /*
2758  * Set ownership given a file descriptor.
2759  */
2760 #ifndef _SYS_SYSPROTO_H_
2761 struct fchown_args {
2762 	int	fd;
2763 	int	uid;
2764 	int	gid;
2765 };
2766 #endif
2767 int
2768 fchown(td, uap)
2769 	struct thread *td;
2770 	register struct fchown_args /* {
2771 		int fd;
2772 		int uid;
2773 		int gid;
2774 	} */ *uap;
2775 {
2776 	struct file *fp;
2777 	int vfslocked;
2778 	int error;
2779 
2780 	AUDIT_ARG(fd, uap->fd);
2781 	AUDIT_ARG(owner, uap->uid, uap->gid);
2782 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2783 		return (error);
2784 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2785 #ifdef AUDIT
2786 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2787 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2788 	VOP_UNLOCK(fp->f_vnode, 0, td);
2789 #endif
2790 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2791 	VFS_UNLOCK_GIANT(vfslocked);
2792 	fdrop(fp, td);
2793 	return (error);
2794 }
2795 
2796 /*
2797  * Common implementation code for utimes(), lutimes(), and futimes().
2798  */
2799 static int
2800 getutimes(usrtvp, tvpseg, tsp)
2801 	const struct timeval *usrtvp;
2802 	enum uio_seg tvpseg;
2803 	struct timespec *tsp;
2804 {
2805 	struct timeval tv[2];
2806 	const struct timeval *tvp;
2807 	int error;
2808 
2809 	if (usrtvp == NULL) {
2810 		microtime(&tv[0]);
2811 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2812 		tsp[1] = tsp[0];
2813 	} else {
2814 		if (tvpseg == UIO_SYSSPACE) {
2815 			tvp = usrtvp;
2816 		} else {
2817 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2818 				return (error);
2819 			tvp = tv;
2820 		}
2821 
2822 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2823 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2824 			return (EINVAL);
2825 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2826 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2827 	}
2828 	return (0);
2829 }
2830 
2831 /*
2832  * Common implementation code for utimes(), lutimes(), and futimes().
2833  */
2834 static int
2835 setutimes(td, vp, ts, numtimes, nullflag)
2836 	struct thread *td;
2837 	struct vnode *vp;
2838 	const struct timespec *ts;
2839 	int numtimes;
2840 	int nullflag;
2841 {
2842 	int error, setbirthtime;
2843 	struct mount *mp;
2844 	struct vattr vattr;
2845 
2846 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2847 		return (error);
2848 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2849 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2850 	setbirthtime = 0;
2851 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2852 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2853 		setbirthtime = 1;
2854 	VATTR_NULL(&vattr);
2855 	vattr.va_atime = ts[0];
2856 	vattr.va_mtime = ts[1];
2857 	if (setbirthtime)
2858 		vattr.va_birthtime = ts[1];
2859 	if (numtimes > 2)
2860 		vattr.va_birthtime = ts[2];
2861 	if (nullflag)
2862 		vattr.va_vaflags |= VA_UTIMES_NULL;
2863 #ifdef MAC
2864 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2865 	    vattr.va_mtime);
2866 #endif
2867 	if (error == 0)
2868 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2869 	VOP_UNLOCK(vp, 0, td);
2870 	vn_finished_write(mp);
2871 	return (error);
2872 }
2873 
2874 /*
2875  * Set the access and modification times of a file.
2876  */
2877 #ifndef _SYS_SYSPROTO_H_
2878 struct utimes_args {
2879 	char	*path;
2880 	struct	timeval *tptr;
2881 };
2882 #endif
2883 int
2884 utimes(td, uap)
2885 	struct thread *td;
2886 	register struct utimes_args /* {
2887 		char *path;
2888 		struct timeval *tptr;
2889 	} */ *uap;
2890 {
2891 
2892 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2893 	    UIO_USERSPACE));
2894 }
2895 
2896 int
2897 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2898     struct timeval *tptr, enum uio_seg tptrseg)
2899 {
2900 	struct timespec ts[2];
2901 	int error;
2902 	struct nameidata nd;
2903 	int vfslocked;
2904 
2905 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2906 		return (error);
2907 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2908 	if ((error = namei(&nd)) != 0)
2909 		return (error);
2910 	vfslocked = NDHASGIANT(&nd);
2911 	NDFREE(&nd, NDF_ONLY_PNBUF);
2912 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2913 	vrele(nd.ni_vp);
2914 	VFS_UNLOCK_GIANT(vfslocked);
2915 	return (error);
2916 }
2917 
2918 /*
2919  * Set the access and modification times of a file.
2920  */
2921 #ifndef _SYS_SYSPROTO_H_
2922 struct lutimes_args {
2923 	char	*path;
2924 	struct	timeval *tptr;
2925 };
2926 #endif
2927 int
2928 lutimes(td, uap)
2929 	struct thread *td;
2930 	register struct lutimes_args /* {
2931 		char *path;
2932 		struct timeval *tptr;
2933 	} */ *uap;
2934 {
2935 
2936 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2937 	    UIO_USERSPACE));
2938 }
2939 
2940 int
2941 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2942     struct timeval *tptr, enum uio_seg tptrseg)
2943 {
2944 	struct timespec ts[2];
2945 	int error;
2946 	struct nameidata nd;
2947 	int vfslocked;
2948 
2949 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2950 		return (error);
2951 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2952 	if ((error = namei(&nd)) != 0)
2953 		return (error);
2954 	vfslocked = NDHASGIANT(&nd);
2955 	NDFREE(&nd, NDF_ONLY_PNBUF);
2956 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2957 	vrele(nd.ni_vp);
2958 	VFS_UNLOCK_GIANT(vfslocked);
2959 	return (error);
2960 }
2961 
2962 /*
2963  * Set the access and modification times of a file.
2964  */
2965 #ifndef _SYS_SYSPROTO_H_
2966 struct futimes_args {
2967 	int	fd;
2968 	struct	timeval *tptr;
2969 };
2970 #endif
2971 int
2972 futimes(td, uap)
2973 	struct thread *td;
2974 	register struct futimes_args /* {
2975 		int  fd;
2976 		struct timeval *tptr;
2977 	} */ *uap;
2978 {
2979 
2980 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2981 }
2982 
2983 int
2984 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2985     enum uio_seg tptrseg)
2986 {
2987 	struct timespec ts[2];
2988 	struct file *fp;
2989 	int vfslocked;
2990 	int error;
2991 
2992 	AUDIT_ARG(fd, fd);
2993 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2994 		return (error);
2995 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2996 		return (error);
2997 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2998 #ifdef AUDIT
2999 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
3000 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
3001 	VOP_UNLOCK(fp->f_vnode, 0, td);
3002 #endif
3003 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
3004 	VFS_UNLOCK_GIANT(vfslocked);
3005 	fdrop(fp, td);
3006 	return (error);
3007 }
3008 
3009 /*
3010  * Truncate a file given its path name.
3011  */
3012 #ifndef _SYS_SYSPROTO_H_
3013 struct truncate_args {
3014 	char	*path;
3015 	int	pad;
3016 	off_t	length;
3017 };
3018 #endif
3019 int
3020 truncate(td, uap)
3021 	struct thread *td;
3022 	register struct truncate_args /* {
3023 		char *path;
3024 		int pad;
3025 		off_t length;
3026 	} */ *uap;
3027 {
3028 
3029 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3030 }
3031 
3032 int
3033 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3034 {
3035 	struct mount *mp;
3036 	struct vnode *vp;
3037 	struct vattr vattr;
3038 	int error;
3039 	struct nameidata nd;
3040 	int vfslocked;
3041 
3042 	if (length < 0)
3043 		return(EINVAL);
3044 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3045 	if ((error = namei(&nd)) != 0)
3046 		return (error);
3047 	vfslocked = NDHASGIANT(&nd);
3048 	vp = nd.ni_vp;
3049 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3050 		vrele(vp);
3051 		VFS_UNLOCK_GIANT(vfslocked);
3052 		return (error);
3053 	}
3054 	NDFREE(&nd, NDF_ONLY_PNBUF);
3055 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3056 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3057 	if (vp->v_type == VDIR)
3058 		error = EISDIR;
3059 #ifdef MAC
3060 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3061 	}
3062 #endif
3063 	else if ((error = vn_writechk(vp)) == 0 &&
3064 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3065 		VATTR_NULL(&vattr);
3066 		vattr.va_size = length;
3067 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3068 	}
3069 	vput(vp);
3070 	vn_finished_write(mp);
3071 	VFS_UNLOCK_GIANT(vfslocked);
3072 	return (error);
3073 }
3074 
3075 /*
3076  * Truncate a file given a file descriptor.
3077  */
3078 #ifndef _SYS_SYSPROTO_H_
3079 struct ftruncate_args {
3080 	int	fd;
3081 	int	pad;
3082 	off_t	length;
3083 };
3084 #endif
3085 int
3086 ftruncate(td, uap)
3087 	struct thread *td;
3088 	register struct ftruncate_args /* {
3089 		int fd;
3090 		int pad;
3091 		off_t length;
3092 	} */ *uap;
3093 {
3094 	struct mount *mp;
3095 	struct vattr vattr;
3096 	struct vnode *vp;
3097 	struct file *fp;
3098 	int vfslocked;
3099 	int error;
3100 
3101 	AUDIT_ARG(fd, uap->fd);
3102 	if (uap->length < 0)
3103 		return(EINVAL);
3104 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3105 		return (error);
3106 	if ((fp->f_flag & FWRITE) == 0) {
3107 		fdrop(fp, td);
3108 		return (EINVAL);
3109 	}
3110 	vp = fp->f_vnode;
3111 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3112 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3113 		goto drop;
3114 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3115 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3116 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3117 	if (vp->v_type == VDIR)
3118 		error = EISDIR;
3119 #ifdef MAC
3120 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3121 	    vp))) {
3122 	}
3123 #endif
3124 	else if ((error = vn_writechk(vp)) == 0) {
3125 		VATTR_NULL(&vattr);
3126 		vattr.va_size = uap->length;
3127 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3128 	}
3129 	VOP_UNLOCK(vp, 0, td);
3130 	vn_finished_write(mp);
3131 drop:
3132 	VFS_UNLOCK_GIANT(vfslocked);
3133 	fdrop(fp, td);
3134 	return (error);
3135 }
3136 
3137 #if defined(COMPAT_43)
3138 /*
3139  * Truncate a file given its path name.
3140  */
3141 #ifndef _SYS_SYSPROTO_H_
3142 struct otruncate_args {
3143 	char	*path;
3144 	long	length;
3145 };
3146 #endif
3147 int
3148 otruncate(td, uap)
3149 	struct thread *td;
3150 	register struct otruncate_args /* {
3151 		char *path;
3152 		long length;
3153 	} */ *uap;
3154 {
3155 	struct truncate_args /* {
3156 		char *path;
3157 		int pad;
3158 		off_t length;
3159 	} */ nuap;
3160 
3161 	nuap.path = uap->path;
3162 	nuap.length = uap->length;
3163 	return (truncate(td, &nuap));
3164 }
3165 
3166 /*
3167  * Truncate a file given a file descriptor.
3168  */
3169 #ifndef _SYS_SYSPROTO_H_
3170 struct oftruncate_args {
3171 	int	fd;
3172 	long	length;
3173 };
3174 #endif
3175 int
3176 oftruncate(td, uap)
3177 	struct thread *td;
3178 	register struct oftruncate_args /* {
3179 		int fd;
3180 		long length;
3181 	} */ *uap;
3182 {
3183 	struct ftruncate_args /* {
3184 		int fd;
3185 		int pad;
3186 		off_t length;
3187 	} */ nuap;
3188 
3189 	nuap.fd = uap->fd;
3190 	nuap.length = uap->length;
3191 	return (ftruncate(td, &nuap));
3192 }
3193 #endif /* COMPAT_43 */
3194 
3195 /*
3196  * Sync an open file.
3197  */
3198 #ifndef _SYS_SYSPROTO_H_
3199 struct fsync_args {
3200 	int	fd;
3201 };
3202 #endif
3203 int
3204 fsync(td, uap)
3205 	struct thread *td;
3206 	struct fsync_args /* {
3207 		int fd;
3208 	} */ *uap;
3209 {
3210 	struct vnode *vp;
3211 	struct mount *mp;
3212 	struct file *fp;
3213 	int vfslocked;
3214 	int error;
3215 
3216 	AUDIT_ARG(fd, uap->fd);
3217 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3218 		return (error);
3219 	vp = fp->f_vnode;
3220 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3221 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3222 		goto drop;
3223 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3224 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3225 	if (vp->v_object != NULL) {
3226 		VM_OBJECT_LOCK(vp->v_object);
3227 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3228 		VM_OBJECT_UNLOCK(vp->v_object);
3229 	}
3230 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3231 
3232 	VOP_UNLOCK(vp, 0, td);
3233 	vn_finished_write(mp);
3234 drop:
3235 	VFS_UNLOCK_GIANT(vfslocked);
3236 	fdrop(fp, td);
3237 	return (error);
3238 }
3239 
3240 /*
3241  * Rename files.  Source and destination must either both be directories,
3242  * or both not be directories.  If target is a directory, it must be empty.
3243  */
3244 #ifndef _SYS_SYSPROTO_H_
3245 struct rename_args {
3246 	char	*from;
3247 	char	*to;
3248 };
3249 #endif
3250 int
3251 rename(td, uap)
3252 	struct thread *td;
3253 	register struct rename_args /* {
3254 		char *from;
3255 		char *to;
3256 	} */ *uap;
3257 {
3258 
3259 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3260 }
3261 
3262 int
3263 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3264 {
3265 	struct mount *mp = NULL;
3266 	struct vnode *tvp, *fvp, *tdvp;
3267 	struct nameidata fromnd, tond;
3268 	int tvfslocked;
3269 	int fvfslocked;
3270 	int error;
3271 
3272 	bwillwrite();
3273 #ifdef MAC
3274 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3275 	    AUDITVNODE1, pathseg, from, td);
3276 #else
3277 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3278 	    AUDITVNODE1, pathseg, from, td);
3279 #endif
3280 	if ((error = namei(&fromnd)) != 0)
3281 		return (error);
3282 	fvfslocked = NDHASGIANT(&fromnd);
3283 	tvfslocked = 0;
3284 #ifdef MAC
3285 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3286 	    fromnd.ni_vp, &fromnd.ni_cnd);
3287 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3288 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3289 #endif
3290 	fvp = fromnd.ni_vp;
3291 	if (error == 0)
3292 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3293 	if (error != 0) {
3294 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3295 		vrele(fromnd.ni_dvp);
3296 		vrele(fvp);
3297 		goto out1;
3298 	}
3299 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3300 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3301 	if (fromnd.ni_vp->v_type == VDIR)
3302 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3303 	if ((error = namei(&tond)) != 0) {
3304 		/* Translate error code for rename("dir1", "dir2/."). */
3305 		if (error == EISDIR && fvp->v_type == VDIR)
3306 			error = EINVAL;
3307 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3308 		vrele(fromnd.ni_dvp);
3309 		vrele(fvp);
3310 		vn_finished_write(mp);
3311 		goto out1;
3312 	}
3313 	tvfslocked = NDHASGIANT(&tond);
3314 	tdvp = tond.ni_dvp;
3315 	tvp = tond.ni_vp;
3316 	if (tvp != NULL) {
3317 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3318 			error = ENOTDIR;
3319 			goto out;
3320 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3321 			error = EISDIR;
3322 			goto out;
3323 		}
3324 	}
3325 	if (fvp == tdvp)
3326 		error = EINVAL;
3327 	/*
3328 	 * If the source is the same as the destination (that is, if they
3329 	 * are links to the same vnode), then there is nothing to do.
3330 	 */
3331 	if (fvp == tvp)
3332 		error = -1;
3333 #ifdef MAC
3334 	else
3335 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3336 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3337 #endif
3338 out:
3339 	if (!error) {
3340 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3341 		if (fromnd.ni_dvp != tdvp) {
3342 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3343 		}
3344 		if (tvp) {
3345 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3346 		}
3347 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3348 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3349 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3350 		NDFREE(&tond, NDF_ONLY_PNBUF);
3351 	} else {
3352 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3353 		NDFREE(&tond, NDF_ONLY_PNBUF);
3354 		if (tvp)
3355 			vput(tvp);
3356 		if (tdvp == tvp)
3357 			vrele(tdvp);
3358 		else
3359 			vput(tdvp);
3360 		vrele(fromnd.ni_dvp);
3361 		vrele(fvp);
3362 	}
3363 	vrele(tond.ni_startdir);
3364 	vn_finished_write(mp);
3365 out1:
3366 	if (fromnd.ni_startdir)
3367 		vrele(fromnd.ni_startdir);
3368 	VFS_UNLOCK_GIANT(fvfslocked);
3369 	VFS_UNLOCK_GIANT(tvfslocked);
3370 	if (error == -1)
3371 		return (0);
3372 	return (error);
3373 }
3374 
3375 /*
3376  * Make a directory file.
3377  */
3378 #ifndef _SYS_SYSPROTO_H_
3379 struct mkdir_args {
3380 	char	*path;
3381 	int	mode;
3382 };
3383 #endif
3384 int
3385 mkdir(td, uap)
3386 	struct thread *td;
3387 	register struct mkdir_args /* {
3388 		char *path;
3389 		int mode;
3390 	} */ *uap;
3391 {
3392 
3393 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3394 }
3395 
3396 int
3397 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3398 {
3399 	struct mount *mp;
3400 	struct vnode *vp;
3401 	struct vattr vattr;
3402 	int error;
3403 	struct nameidata nd;
3404 	int vfslocked;
3405 
3406 	AUDIT_ARG(mode, mode);
3407 restart:
3408 	bwillwrite();
3409 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3410 	    segflg, path, td);
3411 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3412 	if ((error = namei(&nd)) != 0)
3413 		return (error);
3414 	vfslocked = NDHASGIANT(&nd);
3415 	vp = nd.ni_vp;
3416 	if (vp != NULL) {
3417 		NDFREE(&nd, NDF_ONLY_PNBUF);
3418 		/*
3419 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3420 		 * the strange behaviour of leaving the vnode unlocked
3421 		 * if the target is the same vnode as the parent.
3422 		 */
3423 		if (vp == nd.ni_dvp)
3424 			vrele(nd.ni_dvp);
3425 		else
3426 			vput(nd.ni_dvp);
3427 		vrele(vp);
3428 		VFS_UNLOCK_GIANT(vfslocked);
3429 		return (EEXIST);
3430 	}
3431 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3432 		NDFREE(&nd, NDF_ONLY_PNBUF);
3433 		vput(nd.ni_dvp);
3434 		VFS_UNLOCK_GIANT(vfslocked);
3435 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3436 			return (error);
3437 		goto restart;
3438 	}
3439 	VATTR_NULL(&vattr);
3440 	vattr.va_type = VDIR;
3441 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3442 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3443 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3444 #ifdef MAC
3445 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3446 	    &vattr);
3447 	if (error)
3448 		goto out;
3449 #endif
3450 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3451 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3452 #ifdef MAC
3453 out:
3454 #endif
3455 	NDFREE(&nd, NDF_ONLY_PNBUF);
3456 	vput(nd.ni_dvp);
3457 	if (!error)
3458 		vput(nd.ni_vp);
3459 	vn_finished_write(mp);
3460 	VFS_UNLOCK_GIANT(vfslocked);
3461 	return (error);
3462 }
3463 
3464 /*
3465  * Remove a directory file.
3466  */
3467 #ifndef _SYS_SYSPROTO_H_
3468 struct rmdir_args {
3469 	char	*path;
3470 };
3471 #endif
3472 int
3473 rmdir(td, uap)
3474 	struct thread *td;
3475 	struct rmdir_args /* {
3476 		char *path;
3477 	} */ *uap;
3478 {
3479 
3480 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3481 }
3482 
3483 int
3484 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3485 {
3486 	struct mount *mp;
3487 	struct vnode *vp;
3488 	int error;
3489 	struct nameidata nd;
3490 	int vfslocked;
3491 
3492 restart:
3493 	bwillwrite();
3494 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3495 	    pathseg, path, td);
3496 	if ((error = namei(&nd)) != 0)
3497 		return (error);
3498 	vfslocked = NDHASGIANT(&nd);
3499 	vp = nd.ni_vp;
3500 	if (vp->v_type != VDIR) {
3501 		error = ENOTDIR;
3502 		goto out;
3503 	}
3504 	/*
3505 	 * No rmdir "." please.
3506 	 */
3507 	if (nd.ni_dvp == vp) {
3508 		error = EINVAL;
3509 		goto out;
3510 	}
3511 	/*
3512 	 * The root of a mounted filesystem cannot be deleted.
3513 	 */
3514 	if (vp->v_vflag & VV_ROOT) {
3515 		error = EBUSY;
3516 		goto out;
3517 	}
3518 #ifdef MAC
3519 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3520 	    &nd.ni_cnd);
3521 	if (error)
3522 		goto out;
3523 #endif
3524 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3525 		NDFREE(&nd, NDF_ONLY_PNBUF);
3526 		vput(vp);
3527 		if (nd.ni_dvp == vp)
3528 			vrele(nd.ni_dvp);
3529 		else
3530 			vput(nd.ni_dvp);
3531 		VFS_UNLOCK_GIANT(vfslocked);
3532 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3533 			return (error);
3534 		goto restart;
3535 	}
3536 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3537 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3538 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3539 	vn_finished_write(mp);
3540 out:
3541 	NDFREE(&nd, NDF_ONLY_PNBUF);
3542 	vput(vp);
3543 	if (nd.ni_dvp == vp)
3544 		vrele(nd.ni_dvp);
3545 	else
3546 		vput(nd.ni_dvp);
3547 	VFS_UNLOCK_GIANT(vfslocked);
3548 	return (error);
3549 }
3550 
3551 #ifdef COMPAT_43
3552 /*
3553  * Read a block of directory entries in a filesystem independent format.
3554  */
3555 #ifndef _SYS_SYSPROTO_H_
3556 struct ogetdirentries_args {
3557 	int	fd;
3558 	char	*buf;
3559 	u_int	count;
3560 	long	*basep;
3561 };
3562 #endif
3563 int
3564 ogetdirentries(td, uap)
3565 	struct thread *td;
3566 	register struct ogetdirentries_args /* {
3567 		int fd;
3568 		char *buf;
3569 		u_int count;
3570 		long *basep;
3571 	} */ *uap;
3572 {
3573 	struct vnode *vp;
3574 	struct file *fp;
3575 	struct uio auio, kuio;
3576 	struct iovec aiov, kiov;
3577 	struct dirent *dp, *edp;
3578 	caddr_t dirbuf;
3579 	int error, eofflag, readcnt;
3580 	long loff;
3581 
3582 	/* XXX arbitrary sanity limit on `count'. */
3583 	if (uap->count > 64 * 1024)
3584 		return (EINVAL);
3585 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3586 		return (error);
3587 	if ((fp->f_flag & FREAD) == 0) {
3588 		fdrop(fp, td);
3589 		return (EBADF);
3590 	}
3591 	vp = fp->f_vnode;
3592 unionread:
3593 	if (vp->v_type != VDIR) {
3594 		fdrop(fp, td);
3595 		return (EINVAL);
3596 	}
3597 	aiov.iov_base = uap->buf;
3598 	aiov.iov_len = uap->count;
3599 	auio.uio_iov = &aiov;
3600 	auio.uio_iovcnt = 1;
3601 	auio.uio_rw = UIO_READ;
3602 	auio.uio_segflg = UIO_USERSPACE;
3603 	auio.uio_td = td;
3604 	auio.uio_resid = uap->count;
3605 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3606 	loff = auio.uio_offset = fp->f_offset;
3607 #ifdef MAC
3608 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3609 	if (error) {
3610 		VOP_UNLOCK(vp, 0, td);
3611 		fdrop(fp, td);
3612 		return (error);
3613 	}
3614 #endif
3615 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3616 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3617 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3618 			    NULL, NULL);
3619 			fp->f_offset = auio.uio_offset;
3620 		} else
3621 #	endif
3622 	{
3623 		kuio = auio;
3624 		kuio.uio_iov = &kiov;
3625 		kuio.uio_segflg = UIO_SYSSPACE;
3626 		kiov.iov_len = uap->count;
3627 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3628 		kiov.iov_base = dirbuf;
3629 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3630 			    NULL, NULL);
3631 		fp->f_offset = kuio.uio_offset;
3632 		if (error == 0) {
3633 			readcnt = uap->count - kuio.uio_resid;
3634 			edp = (struct dirent *)&dirbuf[readcnt];
3635 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3636 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3637 					/*
3638 					 * The expected low byte of
3639 					 * dp->d_namlen is our dp->d_type.
3640 					 * The high MBZ byte of dp->d_namlen
3641 					 * is our dp->d_namlen.
3642 					 */
3643 					dp->d_type = dp->d_namlen;
3644 					dp->d_namlen = 0;
3645 #				else
3646 					/*
3647 					 * The dp->d_type is the high byte
3648 					 * of the expected dp->d_namlen,
3649 					 * so must be zero'ed.
3650 					 */
3651 					dp->d_type = 0;
3652 #				endif
3653 				if (dp->d_reclen > 0) {
3654 					dp = (struct dirent *)
3655 					    ((char *)dp + dp->d_reclen);
3656 				} else {
3657 					error = EIO;
3658 					break;
3659 				}
3660 			}
3661 			if (dp >= edp)
3662 				error = uiomove(dirbuf, readcnt, &auio);
3663 		}
3664 		FREE(dirbuf, M_TEMP);
3665 	}
3666 	VOP_UNLOCK(vp, 0, td);
3667 	if (error) {
3668 		fdrop(fp, td);
3669 		return (error);
3670 	}
3671 	if (uap->count == auio.uio_resid) {
3672 		if (union_dircheckp) {
3673 			error = union_dircheckp(td, &vp, fp);
3674 			if (error == -1)
3675 				goto unionread;
3676 			if (error) {
3677 				fdrop(fp, td);
3678 				return (error);
3679 			}
3680 		}
3681 		/*
3682 		 * XXX We could delay dropping the lock above but
3683 		 * union_dircheckp complicates things.
3684 		 */
3685 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3686 		if ((vp->v_vflag & VV_ROOT) &&
3687 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3688 			struct vnode *tvp = vp;
3689 			vp = vp->v_mount->mnt_vnodecovered;
3690 			VREF(vp);
3691 			fp->f_vnode = vp;
3692 			fp->f_data = vp;
3693 			fp->f_offset = 0;
3694 			vput(tvp);
3695 			goto unionread;
3696 		}
3697 		VOP_UNLOCK(vp, 0, td);
3698 	}
3699 	error = copyout(&loff, uap->basep, sizeof(long));
3700 	fdrop(fp, td);
3701 	td->td_retval[0] = uap->count - auio.uio_resid;
3702 	return (error);
3703 }
3704 #endif /* COMPAT_43 */
3705 
3706 /*
3707  * Read a block of directory entries in a filesystem independent format.
3708  */
3709 #ifndef _SYS_SYSPROTO_H_
3710 struct getdirentries_args {
3711 	int	fd;
3712 	char	*buf;
3713 	u_int	count;
3714 	long	*basep;
3715 };
3716 #endif
3717 int
3718 getdirentries(td, uap)
3719 	struct thread *td;
3720 	register struct getdirentries_args /* {
3721 		int fd;
3722 		char *buf;
3723 		u_int count;
3724 		long *basep;
3725 	} */ *uap;
3726 {
3727 	struct vnode *vp;
3728 	struct file *fp;
3729 	struct uio auio;
3730 	struct iovec aiov;
3731 	int vfslocked;
3732 	long loff;
3733 	int error, eofflag;
3734 
3735 	AUDIT_ARG(fd, uap->fd);
3736 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3737 		return (error);
3738 	if ((fp->f_flag & FREAD) == 0) {
3739 		fdrop(fp, td);
3740 		return (EBADF);
3741 	}
3742 	vp = fp->f_vnode;
3743 unionread:
3744 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3745 	if (vp->v_type != VDIR) {
3746 		error = EINVAL;
3747 		goto fail;
3748 	}
3749 	aiov.iov_base = uap->buf;
3750 	aiov.iov_len = uap->count;
3751 	auio.uio_iov = &aiov;
3752 	auio.uio_iovcnt = 1;
3753 	auio.uio_rw = UIO_READ;
3754 	auio.uio_segflg = UIO_USERSPACE;
3755 	auio.uio_td = td;
3756 	auio.uio_resid = uap->count;
3757 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3758 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3759 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3760 	loff = auio.uio_offset = fp->f_offset;
3761 #ifdef MAC
3762 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3763 	if (error == 0)
3764 #endif
3765 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3766 		    NULL);
3767 	fp->f_offset = auio.uio_offset;
3768 	VOP_UNLOCK(vp, 0, td);
3769 	if (error)
3770 		goto fail;
3771 	if (uap->count == auio.uio_resid) {
3772 		if (union_dircheckp) {
3773 			error = union_dircheckp(td, &vp, fp);
3774 			if (error == -1) {
3775 				VFS_UNLOCK_GIANT(vfslocked);
3776 				goto unionread;
3777 			}
3778 			if (error)
3779 				goto fail;
3780 		}
3781 		/*
3782 		 * XXX We could delay dropping the lock above but
3783 		 * union_dircheckp complicates things.
3784 		 */
3785 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3786 		if ((vp->v_vflag & VV_ROOT) &&
3787 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3788 			struct vnode *tvp = vp;
3789 			vp = vp->v_mount->mnt_vnodecovered;
3790 			VREF(vp);
3791 			fp->f_vnode = vp;
3792 			fp->f_data = vp;
3793 			fp->f_offset = 0;
3794 			vput(tvp);
3795 			VFS_UNLOCK_GIANT(vfslocked);
3796 			goto unionread;
3797 		}
3798 		VOP_UNLOCK(vp, 0, td);
3799 	}
3800 	if (uap->basep != NULL) {
3801 		error = copyout(&loff, uap->basep, sizeof(long));
3802 	}
3803 	td->td_retval[0] = uap->count - auio.uio_resid;
3804 fail:
3805 	VFS_UNLOCK_GIANT(vfslocked);
3806 	fdrop(fp, td);
3807 	return (error);
3808 }
3809 #ifndef _SYS_SYSPROTO_H_
3810 struct getdents_args {
3811 	int fd;
3812 	char *buf;
3813 	size_t count;
3814 };
3815 #endif
3816 int
3817 getdents(td, uap)
3818 	struct thread *td;
3819 	register struct getdents_args /* {
3820 		int fd;
3821 		char *buf;
3822 		u_int count;
3823 	} */ *uap;
3824 {
3825 	struct getdirentries_args ap;
3826 	ap.fd = uap->fd;
3827 	ap.buf = uap->buf;
3828 	ap.count = uap->count;
3829 	ap.basep = NULL;
3830 	return (getdirentries(td, &ap));
3831 }
3832 
3833 /*
3834  * Set the mode mask for creation of filesystem nodes.
3835  *
3836  * MP SAFE
3837  */
3838 #ifndef _SYS_SYSPROTO_H_
3839 struct umask_args {
3840 	int	newmask;
3841 };
3842 #endif
3843 int
3844 umask(td, uap)
3845 	struct thread *td;
3846 	struct umask_args /* {
3847 		int newmask;
3848 	} */ *uap;
3849 {
3850 	register struct filedesc *fdp;
3851 
3852 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3853 	fdp = td->td_proc->p_fd;
3854 	td->td_retval[0] = fdp->fd_cmask;
3855 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3856 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3857 	return (0);
3858 }
3859 
3860 /*
3861  * Void all references to file by ripping underlying filesystem
3862  * away from vnode.
3863  */
3864 #ifndef _SYS_SYSPROTO_H_
3865 struct revoke_args {
3866 	char	*path;
3867 };
3868 #endif
3869 int
3870 revoke(td, uap)
3871 	struct thread *td;
3872 	register struct revoke_args /* {
3873 		char *path;
3874 	} */ *uap;
3875 {
3876 	struct vnode *vp;
3877 	struct vattr vattr;
3878 	int error;
3879 	struct nameidata nd;
3880 	int vfslocked;
3881 
3882 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3883 	    UIO_USERSPACE, uap->path, td);
3884 	if ((error = namei(&nd)) != 0)
3885 		return (error);
3886 	vfslocked = NDHASGIANT(&nd);
3887 	vp = nd.ni_vp;
3888 	NDFREE(&nd, NDF_ONLY_PNBUF);
3889 	if (vp->v_type != VCHR) {
3890 		error = EINVAL;
3891 		goto out;
3892 	}
3893 #ifdef MAC
3894 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3895 	if (error)
3896 		goto out;
3897 #endif
3898 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3899 	if (error)
3900 		goto out;
3901 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3902 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3903 		if (error)
3904 			goto out;
3905 	}
3906 	if (vcount(vp) > 1)
3907 		VOP_REVOKE(vp, REVOKEALL);
3908 out:
3909 	vput(vp);
3910 	VFS_UNLOCK_GIANT(vfslocked);
3911 	return (error);
3912 }
3913 
3914 /*
3915  * Convert a user file descriptor to a kernel file entry.
3916  * A reference on the file entry is held upon returning.
3917  */
3918 int
3919 getvnode(fdp, fd, fpp)
3920 	struct filedesc *fdp;
3921 	int fd;
3922 	struct file **fpp;
3923 {
3924 	int error;
3925 	struct file *fp;
3926 
3927 	fp = NULL;
3928 	if (fdp == NULL)
3929 		error = EBADF;
3930 	else {
3931 		FILEDESC_LOCK(fdp);
3932 		if ((u_int)fd >= fdp->fd_nfiles ||
3933 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3934 			error = EBADF;
3935 		else if (fp->f_vnode == NULL) {
3936 			fp = NULL;
3937 			error = EINVAL;
3938 		} else {
3939 			fhold(fp);
3940 			error = 0;
3941 		}
3942 		FILEDESC_UNLOCK(fdp);
3943 	}
3944 	*fpp = fp;
3945 	return (error);
3946 }
3947 
3948 /*
3949  * Get (NFS) file handle
3950  */
3951 #ifndef _SYS_SYSPROTO_H_
3952 struct lgetfh_args {
3953 	char	*fname;
3954 	fhandle_t *fhp;
3955 };
3956 #endif
3957 int
3958 lgetfh(td, uap)
3959 	struct thread *td;
3960 	register struct lgetfh_args *uap;
3961 {
3962 	struct nameidata nd;
3963 	fhandle_t fh;
3964 	register struct vnode *vp;
3965 	int vfslocked;
3966 	int error;
3967 
3968 	error = suser(td);
3969 	if (error)
3970 		return (error);
3971 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3972 	    UIO_USERSPACE, uap->fname, td);
3973 	error = namei(&nd);
3974 	if (error)
3975 		return (error);
3976 	vfslocked = NDHASGIANT(&nd);
3977 	NDFREE(&nd, NDF_ONLY_PNBUF);
3978 	vp = nd.ni_vp;
3979 	bzero(&fh, sizeof(fh));
3980 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3981 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3982 	vput(vp);
3983 	VFS_UNLOCK_GIANT(vfslocked);
3984 	if (error)
3985 		return (error);
3986 	error = copyout(&fh, uap->fhp, sizeof (fh));
3987 	return (error);
3988 }
3989 
3990 #ifndef _SYS_SYSPROTO_H_
3991 struct getfh_args {
3992 	char	*fname;
3993 	fhandle_t *fhp;
3994 };
3995 #endif
3996 int
3997 getfh(td, uap)
3998 	struct thread *td;
3999 	register struct getfh_args *uap;
4000 {
4001 	struct nameidata nd;
4002 	fhandle_t fh;
4003 	register struct vnode *vp;
4004 	int vfslocked;
4005 	int error;
4006 
4007 	error = suser(td);
4008 	if (error)
4009 		return (error);
4010 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4011 	    UIO_USERSPACE, uap->fname, td);
4012 	error = namei(&nd);
4013 	if (error)
4014 		return (error);
4015 	vfslocked = NDHASGIANT(&nd);
4016 	NDFREE(&nd, NDF_ONLY_PNBUF);
4017 	vp = nd.ni_vp;
4018 	bzero(&fh, sizeof(fh));
4019 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4020 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4021 	vput(vp);
4022 	VFS_UNLOCK_GIANT(vfslocked);
4023 	if (error)
4024 		return (error);
4025 	error = copyout(&fh, uap->fhp, sizeof (fh));
4026 	return (error);
4027 }
4028 
4029 /*
4030  * syscall for the rpc.lockd to use to translate a NFS file handle into
4031  * an open descriptor.
4032  *
4033  * warning: do not remove the suser() call or this becomes one giant
4034  * security hole.
4035  *
4036  * MP SAFE
4037  */
4038 #ifndef _SYS_SYSPROTO_H_
4039 struct fhopen_args {
4040 	const struct fhandle *u_fhp;
4041 	int flags;
4042 };
4043 #endif
4044 int
4045 fhopen(td, uap)
4046 	struct thread *td;
4047 	struct fhopen_args /* {
4048 		const struct fhandle *u_fhp;
4049 		int flags;
4050 	} */ *uap;
4051 {
4052 	struct proc *p = td->td_proc;
4053 	struct mount *mp;
4054 	struct vnode *vp;
4055 	struct fhandle fhp;
4056 	struct vattr vat;
4057 	struct vattr *vap = &vat;
4058 	struct flock lf;
4059 	struct file *fp;
4060 	register struct filedesc *fdp = p->p_fd;
4061 	int fmode, mode, error, type;
4062 	struct file *nfp;
4063 	int indx;
4064 
4065 	error = suser(td);
4066 	if (error)
4067 		return (error);
4068 	fmode = FFLAGS(uap->flags);
4069 	/* why not allow a non-read/write open for our lockd? */
4070 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4071 		return (EINVAL);
4072 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4073 	if (error)
4074 		return(error);
4075 	/* find the mount point */
4076 	mtx_lock(&Giant);
4077 	mp = vfs_getvfs(&fhp.fh_fsid);
4078 	if (mp == NULL) {
4079 		error = ESTALE;
4080 		goto out;
4081 	}
4082 	/* now give me my vnode, it gets returned to me locked */
4083 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4084 	if (error)
4085 		goto out;
4086 	/*
4087 	 * from now on we have to make sure not
4088 	 * to forget about the vnode
4089 	 * any error that causes an abort must vput(vp)
4090 	 * just set error = err and 'goto bad;'.
4091 	 */
4092 
4093 	/*
4094 	 * from vn_open
4095 	 */
4096 	if (vp->v_type == VLNK) {
4097 		error = EMLINK;
4098 		goto bad;
4099 	}
4100 	if (vp->v_type == VSOCK) {
4101 		error = EOPNOTSUPP;
4102 		goto bad;
4103 	}
4104 	mode = 0;
4105 	if (fmode & (FWRITE | O_TRUNC)) {
4106 		if (vp->v_type == VDIR) {
4107 			error = EISDIR;
4108 			goto bad;
4109 		}
4110 		error = vn_writechk(vp);
4111 		if (error)
4112 			goto bad;
4113 		mode |= VWRITE;
4114 	}
4115 	if (fmode & FREAD)
4116 		mode |= VREAD;
4117 	if (fmode & O_APPEND)
4118 		mode |= VAPPEND;
4119 #ifdef MAC
4120 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4121 	if (error)
4122 		goto bad;
4123 #endif
4124 	if (mode) {
4125 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4126 		if (error)
4127 			goto bad;
4128 	}
4129 	if (fmode & O_TRUNC) {
4130 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4131 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4132 			vrele(vp);
4133 			goto out;
4134 		}
4135 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4136 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4137 #ifdef MAC
4138 		/*
4139 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4140 		 * should be right.
4141 		 */
4142 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4143 		if (error == 0) {
4144 #endif
4145 			VATTR_NULL(vap);
4146 			vap->va_size = 0;
4147 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4148 #ifdef MAC
4149 		}
4150 #endif
4151 		vn_finished_write(mp);
4152 		if (error)
4153 			goto bad;
4154 	}
4155 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4156 	if (error)
4157 		goto bad;
4158 
4159 	if (fmode & FWRITE)
4160 		vp->v_writecount++;
4161 
4162 	/*
4163 	 * end of vn_open code
4164 	 */
4165 
4166 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4167 		if (fmode & FWRITE)
4168 			vp->v_writecount--;
4169 		goto bad;
4170 	}
4171 	/* An extra reference on `nfp' has been held for us by falloc(). */
4172 	fp = nfp;
4173 
4174 	nfp->f_vnode = vp;
4175 	nfp->f_data = vp;
4176 	nfp->f_flag = fmode & FMASK;
4177 	nfp->f_ops = &vnops;
4178 	nfp->f_type = DTYPE_VNODE;
4179 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4180 		lf.l_whence = SEEK_SET;
4181 		lf.l_start = 0;
4182 		lf.l_len = 0;
4183 		if (fmode & O_EXLOCK)
4184 			lf.l_type = F_WRLCK;
4185 		else
4186 			lf.l_type = F_RDLCK;
4187 		type = F_FLOCK;
4188 		if ((fmode & FNONBLOCK) == 0)
4189 			type |= F_WAIT;
4190 		VOP_UNLOCK(vp, 0, td);
4191 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4192 			    type)) != 0) {
4193 			/*
4194 			 * The lock request failed.  Normally close the
4195 			 * descriptor but handle the case where someone might
4196 			 * have dup()d or close()d it when we weren't looking.
4197 			 */
4198 			fdclose(fdp, fp, indx, td);
4199 
4200 			/*
4201 			 * release our private reference
4202 			 */
4203 			fdrop(fp, td);
4204 			goto out;
4205 		}
4206 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4207 		fp->f_flag |= FHASLOCK;
4208 	}
4209 
4210 	VOP_UNLOCK(vp, 0, td);
4211 	fdrop(fp, td);
4212 	mtx_unlock(&Giant);
4213 	td->td_retval[0] = indx;
4214 	return (0);
4215 
4216 bad:
4217 	vput(vp);
4218 out:
4219 	mtx_unlock(&Giant);
4220 	return (error);
4221 }
4222 
4223 /*
4224  * Stat an (NFS) file handle.
4225  *
4226  * MP SAFE
4227  */
4228 #ifndef _SYS_SYSPROTO_H_
4229 struct fhstat_args {
4230 	struct fhandle *u_fhp;
4231 	struct stat *sb;
4232 };
4233 #endif
4234 int
4235 fhstat(td, uap)
4236 	struct thread *td;
4237 	register struct fhstat_args /* {
4238 		struct fhandle *u_fhp;
4239 		struct stat *sb;
4240 	} */ *uap;
4241 {
4242 	struct stat sb;
4243 	fhandle_t fh;
4244 	struct mount *mp;
4245 	struct vnode *vp;
4246 	int error;
4247 
4248 	error = suser(td);
4249 	if (error)
4250 		return (error);
4251 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4252 	if (error)
4253 		return (error);
4254 	mtx_lock(&Giant);
4255 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4256 		mtx_unlock(&Giant);
4257 		return (ESTALE);
4258 	}
4259 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4260 		mtx_unlock(&Giant);
4261 		return (error);
4262 	}
4263 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4264 	vput(vp);
4265 	mtx_unlock(&Giant);
4266 	if (error)
4267 		return (error);
4268 	error = copyout(&sb, uap->sb, sizeof(sb));
4269 	return (error);
4270 }
4271 
4272 /*
4273  * Implement fstatfs() for (NFS) file handles.
4274  *
4275  * MP SAFE
4276  */
4277 #ifndef _SYS_SYSPROTO_H_
4278 struct fhstatfs_args {
4279 	struct fhandle *u_fhp;
4280 	struct statfs *buf;
4281 };
4282 #endif
4283 int
4284 fhstatfs(td, uap)
4285 	struct thread *td;
4286 	struct fhstatfs_args /* {
4287 		struct fhandle *u_fhp;
4288 		struct statfs *buf;
4289 	} */ *uap;
4290 {
4291 	struct statfs sf;
4292 	fhandle_t fh;
4293 	int error;
4294 
4295 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4296 	if (error)
4297 		return (error);
4298 	error = kern_fhstatfs(td, fh, &sf);
4299 	if (error)
4300 		return (error);
4301 	return (copyout(&sf, uap->buf, sizeof(sf)));
4302 }
4303 
4304 int
4305 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4306 {
4307 	struct statfs *sp;
4308 	struct mount *mp;
4309 	struct vnode *vp;
4310 	int error;
4311 
4312 	error = suser(td);
4313 	if (error)
4314 		return (error);
4315 	mtx_lock(&Giant);
4316 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) {
4317 		mtx_unlock(&Giant);
4318 		return (ESTALE);
4319 	}
4320 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4321 	if (error) {
4322 		mtx_unlock(&Giant);
4323 		return (error);
4324 	}
4325 	mp = vp->v_mount;
4326 	if (mp)
4327 		vfs_ref(mp);
4328 	vput(vp);
4329 	if (mp == NULL)
4330 		return (EBADF);
4331 	error = prison_canseemount(td->td_ucred, mp);
4332 	if (error) {
4333 		vfs_rel(mp);
4334 		return (error);
4335 	}
4336 #ifdef MAC
4337 	error = mac_check_mount_stat(td->td_ucred, mp);
4338 	if (error) {
4339 		vfs_rel(mp);
4340 		mtx_unlock(&Giant);
4341 		return (error);
4342 	}
4343 #endif
4344 	/*
4345 	 * Set these in case the underlying filesystem fails to do so.
4346 	 */
4347 	sp = &mp->mnt_stat;
4348 	sp->f_version = STATFS_VERSION;
4349 	sp->f_namemax = NAME_MAX;
4350 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4351 	error = VFS_STATFS(mp, sp, td);
4352 	vfs_rel(mp);
4353 	mtx_unlock(&Giant);
4354 	if (error)
4355 		return (error);
4356 	*buf = *sp;
4357 	return (0);
4358 }
4359 
4360 /*
4361  * Syscall to push extended attribute configuration information into the
4362  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4363  * a command (int cmd), and attribute name and misc data.  For now, the
4364  * attribute name is left in userspace for consumption by the VFS_op.
4365  * It will probably be changed to be copied into sysspace by the
4366  * syscall in the future, once issues with various consumers of the
4367  * attribute code have raised their hands.
4368  *
4369  * Currently this is used only by UFS Extended Attributes.
4370  */
4371 int
4372 extattrctl(td, uap)
4373 	struct thread *td;
4374 	struct extattrctl_args /* {
4375 		const char *path;
4376 		int cmd;
4377 		const char *filename;
4378 		int attrnamespace;
4379 		const char *attrname;
4380 	} */ *uap;
4381 {
4382 	struct vnode *filename_vp;
4383 	struct nameidata nd;
4384 	struct mount *mp, *mp_writable;
4385 	char attrname[EXTATTR_MAXNAMELEN];
4386 	int vfslocked, fnvfslocked, error;
4387 
4388 	/*
4389 	 * uap->attrname is not always defined.  We check again later when we
4390 	 * invoke the VFS call so as to pass in NULL there if needed.
4391 	 */
4392 	if (uap->attrname != NULL) {
4393 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4394 		    NULL);
4395 		if (error)
4396 			return (error);
4397 	}
4398 
4399 	vfslocked = fnvfslocked = 0;
4400 	/*
4401 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4402 	 * which VFS_EXTATTRCTL() will later release.
4403 	 */
4404 	filename_vp = NULL;
4405 	if (uap->filename != NULL) {
4406 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4407 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4408 		error = namei(&nd);
4409 		if (error)
4410 			return (error);
4411 		fnvfslocked = NDHASGIANT(&nd);
4412 		filename_vp = nd.ni_vp;
4413 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4414 	}
4415 
4416 	/* uap->path is always defined. */
4417 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4418 	    uap->path, td);
4419 	error = namei(&nd);
4420 	if (error) {
4421 		if (filename_vp != NULL)
4422 			vput(filename_vp);
4423 		goto out;
4424 	}
4425 	vfslocked = NDHASGIANT(&nd);
4426 	mp = nd.ni_vp->v_mount;
4427 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4428 	NDFREE(&nd, 0);
4429 	if (error) {
4430 		if (filename_vp != NULL)
4431 			vput(filename_vp);
4432 		goto out;
4433 	}
4434 
4435 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4436 	    uap->attrname != NULL ? attrname : NULL, td);
4437 
4438 	vn_finished_write(mp_writable);
4439 	/*
4440 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4441 	 * filename_vp, so vrele it if it is defined.
4442 	 */
4443 	if (filename_vp != NULL)
4444 		vrele(filename_vp);
4445 out:
4446 	VFS_UNLOCK_GIANT(fnvfslocked);
4447 	VFS_UNLOCK_GIANT(vfslocked);
4448 	return (error);
4449 }
4450 
4451 /*-
4452  * Set a named extended attribute on a file or directory
4453  *
4454  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4455  *            kernelspace string pointer "attrname", userspace buffer
4456  *            pointer "data", buffer length "nbytes", thread "td".
4457  * Returns: 0 on success, an error number otherwise
4458  * Locks: none
4459  * References: vp must be a valid reference for the duration of the call
4460  */
4461 static int
4462 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4463     void *data, size_t nbytes, struct thread *td)
4464 {
4465 	struct mount *mp;
4466 	struct uio auio;
4467 	struct iovec aiov;
4468 	ssize_t cnt;
4469 	int error;
4470 
4471 	VFS_ASSERT_GIANT(vp->v_mount);
4472 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4473 	if (error)
4474 		return (error);
4475 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4476 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4477 
4478 	aiov.iov_base = data;
4479 	aiov.iov_len = nbytes;
4480 	auio.uio_iov = &aiov;
4481 	auio.uio_iovcnt = 1;
4482 	auio.uio_offset = 0;
4483 	if (nbytes > INT_MAX) {
4484 		error = EINVAL;
4485 		goto done;
4486 	}
4487 	auio.uio_resid = nbytes;
4488 	auio.uio_rw = UIO_WRITE;
4489 	auio.uio_segflg = UIO_USERSPACE;
4490 	auio.uio_td = td;
4491 	cnt = nbytes;
4492 
4493 #ifdef MAC
4494 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4495 	    attrname, &auio);
4496 	if (error)
4497 		goto done;
4498 #endif
4499 
4500 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4501 	    td->td_ucred, td);
4502 	cnt -= auio.uio_resid;
4503 	td->td_retval[0] = cnt;
4504 
4505 done:
4506 	VOP_UNLOCK(vp, 0, td);
4507 	vn_finished_write(mp);
4508 	return (error);
4509 }
4510 
4511 int
4512 extattr_set_fd(td, uap)
4513 	struct thread *td;
4514 	struct extattr_set_fd_args /* {
4515 		int fd;
4516 		int attrnamespace;
4517 		const char *attrname;
4518 		void *data;
4519 		size_t nbytes;
4520 	} */ *uap;
4521 {
4522 	struct file *fp;
4523 	char attrname[EXTATTR_MAXNAMELEN];
4524 	int vfslocked, error;
4525 
4526 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4527 	if (error)
4528 		return (error);
4529 
4530 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4531 	if (error)
4532 		return (error);
4533 
4534 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4535 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4536 	    attrname, uap->data, uap->nbytes, td);
4537 	fdrop(fp, td);
4538 	VFS_UNLOCK_GIANT(vfslocked);
4539 
4540 	return (error);
4541 }
4542 
4543 int
4544 extattr_set_file(td, uap)
4545 	struct thread *td;
4546 	struct extattr_set_file_args /* {
4547 		const char *path;
4548 		int attrnamespace;
4549 		const char *attrname;
4550 		void *data;
4551 		size_t nbytes;
4552 	} */ *uap;
4553 {
4554 	struct nameidata nd;
4555 	char attrname[EXTATTR_MAXNAMELEN];
4556 	int vfslocked, error;
4557 
4558 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4559 	if (error)
4560 		return (error);
4561 
4562 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4563 	    uap->path, td);
4564 	error = namei(&nd);
4565 	if (error)
4566 		return (error);
4567 	NDFREE(&nd, NDF_ONLY_PNBUF);
4568 
4569 	vfslocked = NDHASGIANT(&nd);
4570 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4571 	    uap->data, uap->nbytes, td);
4572 
4573 	vrele(nd.ni_vp);
4574 	VFS_UNLOCK_GIANT(vfslocked);
4575 	return (error);
4576 }
4577 
4578 int
4579 extattr_set_link(td, uap)
4580 	struct thread *td;
4581 	struct extattr_set_link_args /* {
4582 		const char *path;
4583 		int attrnamespace;
4584 		const char *attrname;
4585 		void *data;
4586 		size_t nbytes;
4587 	} */ *uap;
4588 {
4589 	struct nameidata nd;
4590 	char attrname[EXTATTR_MAXNAMELEN];
4591 	int vfslocked, error;
4592 
4593 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4594 	if (error)
4595 		return (error);
4596 
4597 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4598 	    uap->path, td);
4599 	error = namei(&nd);
4600 	if (error)
4601 		return (error);
4602 	NDFREE(&nd, NDF_ONLY_PNBUF);
4603 
4604 	vfslocked = NDHASGIANT(&nd);
4605 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4606 	    uap->data, uap->nbytes, td);
4607 
4608 	vrele(nd.ni_vp);
4609 	VFS_UNLOCK_GIANT(vfslocked);
4610 	return (error);
4611 }
4612 
4613 /*-
4614  * Get a named extended attribute on a file or directory
4615  *
4616  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4617  *            kernelspace string pointer "attrname", userspace buffer
4618  *            pointer "data", buffer length "nbytes", thread "td".
4619  * Returns: 0 on success, an error number otherwise
4620  * Locks: none
4621  * References: vp must be a valid reference for the duration of the call
4622  */
4623 static int
4624 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4625     void *data, size_t nbytes, struct thread *td)
4626 {
4627 	struct uio auio, *auiop;
4628 	struct iovec aiov;
4629 	ssize_t cnt;
4630 	size_t size, *sizep;
4631 	int error;
4632 
4633 	VFS_ASSERT_GIANT(vp->v_mount);
4634 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4635 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4636 
4637 	/*
4638 	 * Slightly unusual semantics: if the user provides a NULL data
4639 	 * pointer, they don't want to receive the data, just the
4640 	 * maximum read length.
4641 	 */
4642 	auiop = NULL;
4643 	sizep = NULL;
4644 	cnt = 0;
4645 	if (data != NULL) {
4646 		aiov.iov_base = data;
4647 		aiov.iov_len = nbytes;
4648 		auio.uio_iov = &aiov;
4649 		auio.uio_iovcnt = 1;
4650 		auio.uio_offset = 0;
4651 		if (nbytes > INT_MAX) {
4652 			error = EINVAL;
4653 			goto done;
4654 		}
4655 		auio.uio_resid = nbytes;
4656 		auio.uio_rw = UIO_READ;
4657 		auio.uio_segflg = UIO_USERSPACE;
4658 		auio.uio_td = td;
4659 		auiop = &auio;
4660 		cnt = nbytes;
4661 	} else
4662 		sizep = &size;
4663 
4664 #ifdef MAC
4665 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4666 	    attrname, &auio);
4667 	if (error)
4668 		goto done;
4669 #endif
4670 
4671 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4672 	    td->td_ucred, td);
4673 
4674 	if (auiop != NULL) {
4675 		cnt -= auio.uio_resid;
4676 		td->td_retval[0] = cnt;
4677 	} else
4678 		td->td_retval[0] = size;
4679 
4680 done:
4681 	VOP_UNLOCK(vp, 0, td);
4682 	return (error);
4683 }
4684 
4685 int
4686 extattr_get_fd(td, uap)
4687 	struct thread *td;
4688 	struct extattr_get_fd_args /* {
4689 		int fd;
4690 		int attrnamespace;
4691 		const char *attrname;
4692 		void *data;
4693 		size_t nbytes;
4694 	} */ *uap;
4695 {
4696 	struct file *fp;
4697 	char attrname[EXTATTR_MAXNAMELEN];
4698 	int vfslocked, error;
4699 
4700 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4701 	if (error)
4702 		return (error);
4703 
4704 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4705 	if (error)
4706 		return (error);
4707 
4708 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4709 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4710 	    attrname, uap->data, uap->nbytes, td);
4711 
4712 	fdrop(fp, td);
4713 	VFS_UNLOCK_GIANT(vfslocked);
4714 	return (error);
4715 }
4716 
4717 int
4718 extattr_get_file(td, uap)
4719 	struct thread *td;
4720 	struct extattr_get_file_args /* {
4721 		const char *path;
4722 		int attrnamespace;
4723 		const char *attrname;
4724 		void *data;
4725 		size_t nbytes;
4726 	} */ *uap;
4727 {
4728 	struct nameidata nd;
4729 	char attrname[EXTATTR_MAXNAMELEN];
4730 	int vfslocked, error;
4731 
4732 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4733 	if (error)
4734 		return (error);
4735 
4736 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4737 	    uap->path, td);
4738 	error = namei(&nd);
4739 	if (error)
4740 		return (error);
4741 	NDFREE(&nd, NDF_ONLY_PNBUF);
4742 
4743 	vfslocked = NDHASGIANT(&nd);
4744 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4745 	    uap->data, uap->nbytes, td);
4746 
4747 	vrele(nd.ni_vp);
4748 	VFS_UNLOCK_GIANT(vfslocked);
4749 	return (error);
4750 }
4751 
4752 int
4753 extattr_get_link(td, uap)
4754 	struct thread *td;
4755 	struct extattr_get_link_args /* {
4756 		const char *path;
4757 		int attrnamespace;
4758 		const char *attrname;
4759 		void *data;
4760 		size_t nbytes;
4761 	} */ *uap;
4762 {
4763 	struct nameidata nd;
4764 	char attrname[EXTATTR_MAXNAMELEN];
4765 	int vfslocked, error;
4766 
4767 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4768 	if (error)
4769 		return (error);
4770 
4771 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4772 	    uap->path, td);
4773 	error = namei(&nd);
4774 	if (error)
4775 		return (error);
4776 	NDFREE(&nd, NDF_ONLY_PNBUF);
4777 
4778 	vfslocked = NDHASGIANT(&nd);
4779 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4780 	    uap->data, uap->nbytes, td);
4781 
4782 	vrele(nd.ni_vp);
4783 	VFS_UNLOCK_GIANT(vfslocked);
4784 	return (error);
4785 }
4786 
4787 /*
4788  * extattr_delete_vp(): Delete a named extended attribute on a file or
4789  *                      directory
4790  *
4791  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4792  *            kernelspace string pointer "attrname", proc "p"
4793  * Returns: 0 on success, an error number otherwise
4794  * Locks: none
4795  * References: vp must be a valid reference for the duration of the call
4796  */
4797 static int
4798 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4799     struct thread *td)
4800 {
4801 	struct mount *mp;
4802 	int error;
4803 
4804 	VFS_ASSERT_GIANT(vp->v_mount);
4805 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4806 	if (error)
4807 		return (error);
4808 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4809 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4810 
4811 #ifdef MAC
4812 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4813 	    attrname);
4814 	if (error)
4815 		goto done;
4816 #endif
4817 
4818 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4819 	    td);
4820 	if (error == EOPNOTSUPP)
4821 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4822 		    td->td_ucred, td);
4823 #ifdef MAC
4824 done:
4825 #endif
4826 	VOP_UNLOCK(vp, 0, td);
4827 	vn_finished_write(mp);
4828 	return (error);
4829 }
4830 
4831 int
4832 extattr_delete_fd(td, uap)
4833 	struct thread *td;
4834 	struct extattr_delete_fd_args /* {
4835 		int fd;
4836 		int attrnamespace;
4837 		const char *attrname;
4838 	} */ *uap;
4839 {
4840 	struct file *fp;
4841 	char attrname[EXTATTR_MAXNAMELEN];
4842 	int vfslocked, error;
4843 
4844 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4845 	if (error)
4846 		return (error);
4847 
4848 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4849 	if (error)
4850 		return (error);
4851 
4852 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4853 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4854 	    attrname, td);
4855 	fdrop(fp, td);
4856 	VFS_UNLOCK_GIANT(vfslocked);
4857 	return (error);
4858 }
4859 
4860 int
4861 extattr_delete_file(td, uap)
4862 	struct thread *td;
4863 	struct extattr_delete_file_args /* {
4864 		const char *path;
4865 		int attrnamespace;
4866 		const char *attrname;
4867 	} */ *uap;
4868 {
4869 	struct nameidata nd;
4870 	char attrname[EXTATTR_MAXNAMELEN];
4871 	int vfslocked, error;
4872 
4873 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4874 	if (error)
4875 		return(error);
4876 
4877 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4878 	    uap->path, td);
4879 	error = namei(&nd);
4880 	if (error)
4881 		return(error);
4882 	NDFREE(&nd, NDF_ONLY_PNBUF);
4883 
4884 	vfslocked = NDHASGIANT(&nd);
4885 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4886 	vrele(nd.ni_vp);
4887 	VFS_UNLOCK_GIANT(vfslocked);
4888 	return(error);
4889 }
4890 
4891 int
4892 extattr_delete_link(td, uap)
4893 	struct thread *td;
4894 	struct extattr_delete_link_args /* {
4895 		const char *path;
4896 		int attrnamespace;
4897 		const char *attrname;
4898 	} */ *uap;
4899 {
4900 	struct nameidata nd;
4901 	char attrname[EXTATTR_MAXNAMELEN];
4902 	int vfslocked, error;
4903 
4904 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4905 	if (error)
4906 		return(error);
4907 
4908 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4909 	    uap->path, td);
4910 	error = namei(&nd);
4911 	if (error)
4912 		return(error);
4913 	NDFREE(&nd, NDF_ONLY_PNBUF);
4914 
4915 	vfslocked = NDHASGIANT(&nd);
4916 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4917 	vrele(nd.ni_vp);
4918 	VFS_UNLOCK_GIANT(vfslocked);
4919 	return(error);
4920 }
4921 
4922 /*-
4923  * Retrieve a list of extended attributes on a file or directory.
4924  *
4925  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4926  *            userspace buffer pointer "data", buffer length "nbytes",
4927  *            thread "td".
4928  * Returns: 0 on success, an error number otherwise
4929  * Locks: none
4930  * References: vp must be a valid reference for the duration of the call
4931  */
4932 static int
4933 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4934     size_t nbytes, struct thread *td)
4935 {
4936 	struct uio auio, *auiop;
4937 	size_t size, *sizep;
4938 	struct iovec aiov;
4939 	ssize_t cnt;
4940 	int error;
4941 
4942 	VFS_ASSERT_GIANT(vp->v_mount);
4943 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4944 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4945 
4946 	auiop = NULL;
4947 	sizep = NULL;
4948 	cnt = 0;
4949 	if (data != NULL) {
4950 		aiov.iov_base = data;
4951 		aiov.iov_len = nbytes;
4952 		auio.uio_iov = &aiov;
4953 		auio.uio_iovcnt = 1;
4954 		auio.uio_offset = 0;
4955 		if (nbytes > INT_MAX) {
4956 			error = EINVAL;
4957 			goto done;
4958 		}
4959 		auio.uio_resid = nbytes;
4960 		auio.uio_rw = UIO_READ;
4961 		auio.uio_segflg = UIO_USERSPACE;
4962 		auio.uio_td = td;
4963 		auiop = &auio;
4964 		cnt = nbytes;
4965 	} else
4966 		sizep = &size;
4967 
4968 #ifdef MAC
4969 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4970 	if (error)
4971 		goto done;
4972 #endif
4973 
4974 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4975 	    td->td_ucred, td);
4976 
4977 	if (auiop != NULL) {
4978 		cnt -= auio.uio_resid;
4979 		td->td_retval[0] = cnt;
4980 	} else
4981 		td->td_retval[0] = size;
4982 
4983 done:
4984 	VOP_UNLOCK(vp, 0, td);
4985 	return (error);
4986 }
4987 
4988 
4989 int
4990 extattr_list_fd(td, uap)
4991 	struct thread *td;
4992 	struct extattr_list_fd_args /* {
4993 		int fd;
4994 		int attrnamespace;
4995 		void *data;
4996 		size_t nbytes;
4997 	} */ *uap;
4998 {
4999 	struct file *fp;
5000 	int vfslocked, error;
5001 
5002 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5003 	if (error)
5004 		return (error);
5005 
5006 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5007 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5008 	    uap->nbytes, td);
5009 
5010 	fdrop(fp, td);
5011 	VFS_UNLOCK_GIANT(vfslocked);
5012 	return (error);
5013 }
5014 
5015 int
5016 extattr_list_file(td, uap)
5017 	struct thread*td;
5018 	struct extattr_list_file_args /* {
5019 		const char *path;
5020 		int attrnamespace;
5021 		void *data;
5022 		size_t nbytes;
5023 	} */ *uap;
5024 {
5025 	struct nameidata nd;
5026 	int vfslocked, error;
5027 
5028 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5029 	    uap->path, td);
5030 	error = namei(&nd);
5031 	if (error)
5032 		return (error);
5033 	NDFREE(&nd, NDF_ONLY_PNBUF);
5034 
5035 	vfslocked = NDHASGIANT(&nd);
5036 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5037 	    uap->nbytes, td);
5038 
5039 	vrele(nd.ni_vp);
5040 	VFS_UNLOCK_GIANT(vfslocked);
5041 	return (error);
5042 }
5043 
5044 int
5045 extattr_list_link(td, uap)
5046 	struct thread*td;
5047 	struct extattr_list_link_args /* {
5048 		const char *path;
5049 		int attrnamespace;
5050 		void *data;
5051 		size_t nbytes;
5052 	} */ *uap;
5053 {
5054 	struct nameidata nd;
5055 	int vfslocked, error;
5056 
5057 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5058 	    uap->path, td);
5059 	error = namei(&nd);
5060 	if (error)
5061 		return (error);
5062 	NDFREE(&nd, NDF_ONLY_PNBUF);
5063 
5064 	vfslocked = NDHASGIANT(&nd);
5065 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5066 	    uap->nbytes, td);
5067 
5068 	vrele(nd.ni_vp);
5069 	VFS_UNLOCK_GIANT(vfslocked);
5070 	return (error);
5071 }
5072