xref: /freebsd/sys/kern/vfs_extattr.c (revision 91c878a6935c5c2e99866eb267e5bc3028bf6d2f)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_compat.h"
41 #include "opt_mac.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/sysent.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/mutex.h>
51 #include <sys/sysproto.h>
52 #include <sys/namei.h>
53 #include <sys/filedesc.h>
54 #include <sys/kernel.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/limits.h>
58 #include <sys/linker.h>
59 #include <sys/stat.h>
60 #include <sys/sx.h>
61 #include <sys/unistd.h>
62 #include <sys/vnode.h>
63 #include <sys/proc.h>
64 #include <sys/dirent.h>
65 #include <sys/extattr.h>
66 #include <sys/jail.h>
67 #include <sys/syscallsubr.h>
68 #include <sys/sysctl.h>
69 
70 #include <machine/stdarg.h>
71 
72 #include <security/audit/audit.h>
73 #include <security/mac/mac_framework.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_page.h>
78 #include <vm/uma.h>
79 
80 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
82 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83 static int setfmode(struct thread *td, struct vnode *, int);
84 static int setfflags(struct thread *td, struct vnode *, int);
85 static int setutimes(struct thread *td, struct vnode *,
86     const struct timespec *, int, int);
87 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88     struct thread *td);
89 
90 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
91     size_t nbytes, struct thread *td);
92 
93 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
94 
95 /*
96  * The module initialization routine for POSIX asynchronous I/O will
97  * set this to the version of AIO that it implements.  (Zero means
98  * that it is not implemented.)  This value is used here by pathconf()
99  * and in kern_descrip.c by fpathconf().
100  */
101 int async_io_version;
102 
103 /*
104  * Sync each mounted filesystem.
105  */
106 #ifndef _SYS_SYSPROTO_H_
107 struct sync_args {
108 	int     dummy;
109 };
110 #endif
111 
112 #ifdef DEBUG
113 static int syncprt = 0;
114 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115 #endif
116 
117 /* ARGSUSED */
118 int
119 sync(td, uap)
120 	struct thread *td;
121 	struct sync_args *uap;
122 {
123 	struct mount *mp, *nmp;
124 	int vfslocked;
125 
126 	mtx_lock(&mountlist_mtx);
127 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129 			nmp = TAILQ_NEXT(mp, mnt_list);
130 			continue;
131 		}
132 		vfslocked = VFS_LOCK_GIANT(mp);
133 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
134 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
135 			MNT_ILOCK(mp);
136 			mp->mnt_noasync++;
137 			mp->mnt_kern_flag &= ~MNTK_ASYNC;
138 			MNT_IUNLOCK(mp);
139 			vfs_msync(mp, MNT_NOWAIT);
140 			VFS_SYNC(mp, MNT_NOWAIT, td);
141 			MNT_ILOCK(mp);
142 			mp->mnt_noasync--;
143 			if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
144 			    mp->mnt_noasync == 0)
145 				mp->mnt_kern_flag |= MNTK_ASYNC;
146 			MNT_IUNLOCK(mp);
147 			vn_finished_write(mp);
148 		}
149 		VFS_UNLOCK_GIANT(vfslocked);
150 		mtx_lock(&mountlist_mtx);
151 		nmp = TAILQ_NEXT(mp, mnt_list);
152 		vfs_unbusy(mp, td);
153 	}
154 	mtx_unlock(&mountlist_mtx);
155 	return (0);
156 }
157 
158 /* XXX PRISON: could be per prison flag */
159 static int prison_quotas;
160 #if 0
161 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
162 #endif
163 
164 /*
165  * Change filesystem quotas.
166  *
167  * MP SAFE
168  */
169 #ifndef _SYS_SYSPROTO_H_
170 struct quotactl_args {
171 	char *path;
172 	int cmd;
173 	int uid;
174 	caddr_t arg;
175 };
176 #endif
177 int
178 quotactl(td, uap)
179 	struct thread *td;
180 	register struct quotactl_args /* {
181 		char *path;
182 		int cmd;
183 		int uid;
184 		caddr_t arg;
185 	} */ *uap;
186 {
187 	struct mount *mp, *vmp;
188 	int vfslocked;
189 	int error;
190 	struct nameidata nd;
191 
192 	AUDIT_ARG(cmd, uap->cmd);
193 	AUDIT_ARG(uid, uap->uid);
194 	if (jailed(td->td_ucred) && !prison_quotas)
195 		return (EPERM);
196 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1,
197 	   UIO_USERSPACE, uap->path, td);
198 	if ((error = namei(&nd)) != 0)
199 		return (error);
200 	vfslocked = NDHASGIANT(&nd);
201 	NDFREE(&nd, NDF_ONLY_PNBUF);
202 	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
203 	mp = nd.ni_vp->v_mount;
204 	vrele(nd.ni_vp);
205 	if (error)
206 		goto out;
207 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
208 	vn_finished_write(vmp);
209 out:
210 	VFS_UNLOCK_GIANT(vfslocked);
211 	return (error);
212 }
213 
214 /*
215  * Get filesystem statistics.
216  */
217 #ifndef _SYS_SYSPROTO_H_
218 struct statfs_args {
219 	char *path;
220 	struct statfs *buf;
221 };
222 #endif
223 int
224 statfs(td, uap)
225 	struct thread *td;
226 	register struct statfs_args /* {
227 		char *path;
228 		struct statfs *buf;
229 	} */ *uap;
230 {
231 	struct statfs sf;
232 	int error;
233 
234 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
235 	if (error == 0)
236 		error = copyout(&sf, uap->buf, sizeof(sf));
237 	return (error);
238 }
239 
240 int
241 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
242     struct statfs *buf)
243 {
244 	struct mount *mp;
245 	struct statfs *sp, sb;
246 	int vfslocked;
247 	int error;
248 	struct nameidata nd;
249 
250 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
251 	    pathseg, path, td);
252 	error = namei(&nd);
253 	if (error)
254 		return (error);
255 	vfslocked = NDHASGIANT(&nd);
256 	mp = nd.ni_vp->v_mount;
257 	vfs_ref(mp);
258 	NDFREE(&nd, NDF_ONLY_PNBUF);
259 	vput(nd.ni_vp);
260 #ifdef MAC
261 	error = mac_check_mount_stat(td->td_ucred, mp);
262 	if (error)
263 		goto out;
264 #endif
265 	/*
266 	 * Set these in case the underlying filesystem fails to do so.
267 	 */
268 	sp = &mp->mnt_stat;
269 	sp->f_version = STATFS_VERSION;
270 	sp->f_namemax = NAME_MAX;
271 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
272 	error = VFS_STATFS(mp, sp, td);
273 	if (error)
274 		goto out;
275 	if (suser(td)) {
276 		bcopy(sp, &sb, sizeof(sb));
277 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
278 		prison_enforce_statfs(td->td_ucred, mp, &sb);
279 		sp = &sb;
280 	}
281 	*buf = *sp;
282 out:
283 	vfs_rel(mp);
284 	VFS_UNLOCK_GIANT(vfslocked);
285 	if (mtx_owned(&Giant))
286 		printf("statfs(%d): %s: %d\n", vfslocked, path, error);
287 	return (error);
288 }
289 
290 /*
291  * Get filesystem statistics.
292  */
293 #ifndef _SYS_SYSPROTO_H_
294 struct fstatfs_args {
295 	int fd;
296 	struct statfs *buf;
297 };
298 #endif
299 int
300 fstatfs(td, uap)
301 	struct thread *td;
302 	register struct fstatfs_args /* {
303 		int fd;
304 		struct statfs *buf;
305 	} */ *uap;
306 {
307 	struct statfs sf;
308 	int error;
309 
310 	error = kern_fstatfs(td, uap->fd, &sf);
311 	if (error == 0)
312 		error = copyout(&sf, uap->buf, sizeof(sf));
313 	return (error);
314 }
315 
316 int
317 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
318 {
319 	struct file *fp;
320 	struct mount *mp;
321 	struct statfs *sp, sb;
322 	int vfslocked;
323 	struct vnode *vp;
324 	int error;
325 
326 	AUDIT_ARG(fd, fd);
327 	error = getvnode(td->td_proc->p_fd, fd, &fp);
328 	if (error)
329 		return (error);
330 	vp = fp->f_vnode;
331 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
332 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
333 #ifdef AUDIT
334 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
335 #endif
336 	mp = vp->v_mount;
337 	if (mp)
338 		vfs_ref(mp);
339 	VOP_UNLOCK(vp, 0, td);
340 	fdrop(fp, td);
341 	if (vp->v_iflag & VI_DOOMED) {
342 		error = EBADF;
343 		goto out;
344 	}
345 #ifdef MAC
346 	error = mac_check_mount_stat(td->td_ucred, mp);
347 	if (error)
348 		goto out;
349 #endif
350 	/*
351 	 * Set these in case the underlying filesystem fails to do so.
352 	 */
353 	sp = &mp->mnt_stat;
354 	sp->f_version = STATFS_VERSION;
355 	sp->f_namemax = NAME_MAX;
356 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357 	error = VFS_STATFS(mp, sp, td);
358 	if (error)
359 		goto out;
360 	if (suser(td)) {
361 		bcopy(sp, &sb, sizeof(sb));
362 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
363 		prison_enforce_statfs(td->td_ucred, mp, &sb);
364 		sp = &sb;
365 	}
366 	*buf = *sp;
367 out:
368 	if (mp)
369 		vfs_rel(mp);
370 	VFS_UNLOCK_GIANT(vfslocked);
371 	return (error);
372 }
373 
374 /*
375  * Get statistics on all filesystems.
376  */
377 #ifndef _SYS_SYSPROTO_H_
378 struct getfsstat_args {
379 	struct statfs *buf;
380 	long bufsize;
381 	int flags;
382 };
383 #endif
384 int
385 getfsstat(td, uap)
386 	struct thread *td;
387 	register struct getfsstat_args /* {
388 		struct statfs *buf;
389 		long bufsize;
390 		int flags;
391 	} */ *uap;
392 {
393 
394 	return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE,
395 	    uap->flags));
396 }
397 
398 /*
399  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
400  * 	The caller is responsible for freeing memory which will be allocated
401  *	in '*buf'.
402  */
403 int
404 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
405     enum uio_seg bufseg, int flags)
406 {
407 	struct mount *mp, *nmp;
408 	struct statfs *sfsp, *sp, sb;
409 	size_t count, maxcount;
410 	int vfslocked;
411 	int error;
412 
413 	maxcount = bufsize / sizeof(struct statfs);
414 	if (bufsize == 0)
415 		sfsp = NULL;
416 	else if (bufseg == UIO_USERSPACE)
417 		sfsp = *buf;
418 	else /* if (bufseg == UIO_SYSSPACE) */ {
419 		count = 0;
420 		mtx_lock(&mountlist_mtx);
421 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
422 			count++;
423 		}
424 		mtx_unlock(&mountlist_mtx);
425 		if (maxcount > count)
426 			maxcount = count;
427 		sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP,
428 		    M_WAITOK);
429 	}
430 	count = 0;
431 	mtx_lock(&mountlist_mtx);
432 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
433 		if (prison_canseemount(td->td_ucred, mp) != 0) {
434 			nmp = TAILQ_NEXT(mp, mnt_list);
435 			continue;
436 		}
437 #ifdef MAC
438 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
439 			nmp = TAILQ_NEXT(mp, mnt_list);
440 			continue;
441 		}
442 #endif
443 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
444 			nmp = TAILQ_NEXT(mp, mnt_list);
445 			continue;
446 		}
447 		vfslocked = VFS_LOCK_GIANT(mp);
448 		if (sfsp && count < maxcount) {
449 			sp = &mp->mnt_stat;
450 			/*
451 			 * Set these in case the underlying filesystem
452 			 * fails to do so.
453 			 */
454 			sp->f_version = STATFS_VERSION;
455 			sp->f_namemax = NAME_MAX;
456 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
457 			/*
458 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
459 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
460 			 * overrides MNT_WAIT.
461 			 */
462 			if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
463 			    (flags & MNT_WAIT)) &&
464 			    (error = VFS_STATFS(mp, sp, td))) {
465 				VFS_UNLOCK_GIANT(vfslocked);
466 				mtx_lock(&mountlist_mtx);
467 				nmp = TAILQ_NEXT(mp, mnt_list);
468 				vfs_unbusy(mp, td);
469 				continue;
470 			}
471 			if (suser(td)) {
472 				bcopy(sp, &sb, sizeof(sb));
473 				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
474 				prison_enforce_statfs(td->td_ucred, mp, &sb);
475 				sp = &sb;
476 			}
477 			if (bufseg == UIO_SYSSPACE)
478 				bcopy(sp, sfsp, sizeof(*sp));
479 			else /* if (bufseg == UIO_USERSPACE) */ {
480 				error = copyout(sp, sfsp, sizeof(*sp));
481 				if (error) {
482 					vfs_unbusy(mp, td);
483 					VFS_UNLOCK_GIANT(vfslocked);
484 					return (error);
485 				}
486 			}
487 			sfsp++;
488 		}
489 		VFS_UNLOCK_GIANT(vfslocked);
490 		count++;
491 		mtx_lock(&mountlist_mtx);
492 		nmp = TAILQ_NEXT(mp, mnt_list);
493 		vfs_unbusy(mp, td);
494 	}
495 	mtx_unlock(&mountlist_mtx);
496 	if (sfsp && count > maxcount)
497 		td->td_retval[0] = maxcount;
498 	else
499 		td->td_retval[0] = count;
500 	return (0);
501 }
502 
503 #ifdef COMPAT_FREEBSD4
504 /*
505  * Get old format filesystem statistics.
506  */
507 static void cvtstatfs(struct statfs *, struct ostatfs *);
508 
509 #ifndef _SYS_SYSPROTO_H_
510 struct freebsd4_statfs_args {
511 	char *path;
512 	struct ostatfs *buf;
513 };
514 #endif
515 int
516 freebsd4_statfs(td, uap)
517 	struct thread *td;
518 	struct freebsd4_statfs_args /* {
519 		char *path;
520 		struct ostatfs *buf;
521 	} */ *uap;
522 {
523 	struct ostatfs osb;
524 	struct statfs sf;
525 	int error;
526 
527 	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
528 	if (error)
529 		return (error);
530 	cvtstatfs(&sf, &osb);
531 	return (copyout(&osb, uap->buf, sizeof(osb)));
532 }
533 
534 /*
535  * Get filesystem statistics.
536  */
537 #ifndef _SYS_SYSPROTO_H_
538 struct freebsd4_fstatfs_args {
539 	int fd;
540 	struct ostatfs *buf;
541 };
542 #endif
543 int
544 freebsd4_fstatfs(td, uap)
545 	struct thread *td;
546 	struct freebsd4_fstatfs_args /* {
547 		int fd;
548 		struct ostatfs *buf;
549 	} */ *uap;
550 {
551 	struct ostatfs osb;
552 	struct statfs sf;
553 	int error;
554 
555 	error = kern_fstatfs(td, uap->fd, &sf);
556 	if (error)
557 		return (error);
558 	cvtstatfs(&sf, &osb);
559 	return (copyout(&osb, uap->buf, sizeof(osb)));
560 }
561 
562 /*
563  * Get statistics on all filesystems.
564  */
565 #ifndef _SYS_SYSPROTO_H_
566 struct freebsd4_getfsstat_args {
567 	struct ostatfs *buf;
568 	long bufsize;
569 	int flags;
570 };
571 #endif
572 int
573 freebsd4_getfsstat(td, uap)
574 	struct thread *td;
575 	register struct freebsd4_getfsstat_args /* {
576 		struct ostatfs *buf;
577 		long bufsize;
578 		int flags;
579 	} */ *uap;
580 {
581 	struct statfs *buf, *sp;
582 	struct ostatfs osb;
583 	size_t count, size;
584 	int error;
585 
586 	count = uap->bufsize / sizeof(struct ostatfs);
587 	size = count * sizeof(struct statfs);
588 	error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags);
589 	if (size > 0) {
590 		count = td->td_retval[0];
591 		sp = buf;
592 		while (count > 0 && error == 0) {
593 			cvtstatfs(sp, &osb);
594 			error = copyout(&osb, uap->buf, sizeof(osb));
595 			sp++;
596 			uap->buf++;
597 			count--;
598 		}
599 		free(buf, M_TEMP);
600 	}
601 	return (error);
602 }
603 
604 /*
605  * Implement fstatfs() for (NFS) file handles.
606  */
607 #ifndef _SYS_SYSPROTO_H_
608 struct freebsd4_fhstatfs_args {
609 	struct fhandle *u_fhp;
610 	struct ostatfs *buf;
611 };
612 #endif
613 int
614 freebsd4_fhstatfs(td, uap)
615 	struct thread *td;
616 	struct freebsd4_fhstatfs_args /* {
617 		struct fhandle *u_fhp;
618 		struct ostatfs *buf;
619 	} */ *uap;
620 {
621 	struct ostatfs osb;
622 	struct statfs sf;
623 	fhandle_t fh;
624 	int error;
625 
626 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
627 	if (error)
628 		return (error);
629 	error = kern_fhstatfs(td, fh, &sf);
630 	if (error)
631 		return (error);
632 	cvtstatfs(&sf, &osb);
633 	return (copyout(&osb, uap->buf, sizeof(osb)));
634 }
635 
636 /*
637  * Convert a new format statfs structure to an old format statfs structure.
638  */
639 static void
640 cvtstatfs(nsp, osp)
641 	struct statfs *nsp;
642 	struct ostatfs *osp;
643 {
644 
645 	bzero(osp, sizeof(*osp));
646 	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
647 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
648 	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
649 	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
650 	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
651 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
652 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
653 	osp->f_owner = nsp->f_owner;
654 	osp->f_type = nsp->f_type;
655 	osp->f_flags = nsp->f_flags;
656 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
657 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
658 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
659 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
660 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
661 	    MIN(MFSNAMELEN, OMFSNAMELEN));
662 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
663 	    MIN(MNAMELEN, OMNAMELEN));
664 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
665 	    MIN(MNAMELEN, OMNAMELEN));
666 	osp->f_fsid = nsp->f_fsid;
667 }
668 #endif /* COMPAT_FREEBSD4 */
669 
670 /*
671  * Change current working directory to a given file descriptor.
672  */
673 #ifndef _SYS_SYSPROTO_H_
674 struct fchdir_args {
675 	int	fd;
676 };
677 #endif
678 int
679 fchdir(td, uap)
680 	struct thread *td;
681 	struct fchdir_args /* {
682 		int fd;
683 	} */ *uap;
684 {
685 	register struct filedesc *fdp = td->td_proc->p_fd;
686 	struct vnode *vp, *tdp, *vpold;
687 	struct mount *mp;
688 	struct file *fp;
689 	int vfslocked;
690 	int error;
691 
692 	AUDIT_ARG(fd, uap->fd);
693 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
694 		return (error);
695 	vp = fp->f_vnode;
696 	VREF(vp);
697 	fdrop(fp, td);
698 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
699 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
700 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
701 	error = change_dir(vp, td);
702 	while (!error && (mp = vp->v_mountedhere) != NULL) {
703 		int tvfslocked;
704 		if (vfs_busy(mp, 0, 0, td))
705 			continue;
706 		tvfslocked = VFS_LOCK_GIANT(mp);
707 		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
708 		vfs_unbusy(mp, td);
709 		if (error) {
710 			VFS_UNLOCK_GIANT(tvfslocked);
711 			break;
712 		}
713 		vput(vp);
714 		VFS_UNLOCK_GIANT(vfslocked);
715 		vp = tdp;
716 		vfslocked = tvfslocked;
717 	}
718 	if (error) {
719 		vput(vp);
720 		VFS_UNLOCK_GIANT(vfslocked);
721 		return (error);
722 	}
723 	VOP_UNLOCK(vp, 0, td);
724 	VFS_UNLOCK_GIANT(vfslocked);
725 	FILEDESC_LOCK_FAST(fdp);
726 	vpold = fdp->fd_cdir;
727 	fdp->fd_cdir = vp;
728 	FILEDESC_UNLOCK_FAST(fdp);
729 	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
730 	vrele(vpold);
731 	VFS_UNLOCK_GIANT(vfslocked);
732 	return (0);
733 }
734 
735 /*
736  * Change current working directory (``.'').
737  */
738 #ifndef _SYS_SYSPROTO_H_
739 struct chdir_args {
740 	char	*path;
741 };
742 #endif
743 int
744 chdir(td, uap)
745 	struct thread *td;
746 	struct chdir_args /* {
747 		char *path;
748 	} */ *uap;
749 {
750 
751 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
752 }
753 
754 int
755 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
756 {
757 	register struct filedesc *fdp = td->td_proc->p_fd;
758 	int error;
759 	struct nameidata nd;
760 	struct vnode *vp;
761 	int vfslocked;
762 
763 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1 | MPSAFE,
764 	    pathseg, path, td);
765 	if ((error = namei(&nd)) != 0)
766 		return (error);
767 	vfslocked = NDHASGIANT(&nd);
768 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
769 		vput(nd.ni_vp);
770 		VFS_UNLOCK_GIANT(vfslocked);
771 		NDFREE(&nd, NDF_ONLY_PNBUF);
772 		return (error);
773 	}
774 	VOP_UNLOCK(nd.ni_vp, 0, td);
775 	VFS_UNLOCK_GIANT(vfslocked);
776 	NDFREE(&nd, NDF_ONLY_PNBUF);
777 	FILEDESC_LOCK_FAST(fdp);
778 	vp = fdp->fd_cdir;
779 	fdp->fd_cdir = nd.ni_vp;
780 	FILEDESC_UNLOCK_FAST(fdp);
781 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
782 	vrele(vp);
783 	VFS_UNLOCK_GIANT(vfslocked);
784 	return (0);
785 }
786 
787 /*
788  * Helper function for raised chroot(2) security function:  Refuse if
789  * any filedescriptors are open directories.
790  */
791 static int
792 chroot_refuse_vdir_fds(fdp)
793 	struct filedesc *fdp;
794 {
795 	struct vnode *vp;
796 	struct file *fp;
797 	int fd;
798 
799 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
800 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
801 		fp = fget_locked(fdp, fd);
802 		if (fp == NULL)
803 			continue;
804 		if (fp->f_type == DTYPE_VNODE) {
805 			vp = fp->f_vnode;
806 			if (vp->v_type == VDIR)
807 				return (EPERM);
808 		}
809 	}
810 	return (0);
811 }
812 
813 /*
814  * This sysctl determines if we will allow a process to chroot(2) if it
815  * has a directory open:
816  *	0: disallowed for all processes.
817  *	1: allowed for processes that were not already chroot(2)'ed.
818  *	2: allowed for all processes.
819  */
820 
821 static int chroot_allow_open_directories = 1;
822 
823 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
824      &chroot_allow_open_directories, 0, "");
825 
826 /*
827  * Change notion of root (``/'') directory.
828  */
829 #ifndef _SYS_SYSPROTO_H_
830 struct chroot_args {
831 	char	*path;
832 };
833 #endif
834 int
835 chroot(td, uap)
836 	struct thread *td;
837 	struct chroot_args /* {
838 		char *path;
839 	} */ *uap;
840 {
841 	int error;
842 	struct nameidata nd;
843 	int vfslocked;
844 
845 	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
846 	if (error)
847 		return (error);
848 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
849 	    UIO_USERSPACE, uap->path, td);
850 	error = namei(&nd);
851 	if (error)
852 		goto error;
853 	vfslocked = NDHASGIANT(&nd);
854 	if ((error = change_dir(nd.ni_vp, td)) != 0)
855 		goto e_vunlock;
856 #ifdef MAC
857 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
858 		goto e_vunlock;
859 #endif
860 	VOP_UNLOCK(nd.ni_vp, 0, td);
861 	error = change_root(nd.ni_vp, td);
862 	vrele(nd.ni_vp);
863 	VFS_UNLOCK_GIANT(vfslocked);
864 	NDFREE(&nd, NDF_ONLY_PNBUF);
865 	return (error);
866 e_vunlock:
867 	vput(nd.ni_vp);
868 	VFS_UNLOCK_GIANT(vfslocked);
869 error:
870 	NDFREE(&nd, NDF_ONLY_PNBUF);
871 	return (error);
872 }
873 
874 /*
875  * Common routine for chroot and chdir.  Callers must provide a locked vnode
876  * instance.
877  */
878 int
879 change_dir(vp, td)
880 	struct vnode *vp;
881 	struct thread *td;
882 {
883 	int error;
884 
885 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
886 	if (vp->v_type != VDIR)
887 		return (ENOTDIR);
888 #ifdef MAC
889 	error = mac_check_vnode_chdir(td->td_ucred, vp);
890 	if (error)
891 		return (error);
892 #endif
893 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
894 	return (error);
895 }
896 
897 /*
898  * Common routine for kern_chroot() and jail_attach().  The caller is
899  * responsible for invoking suser() and mac_check_chroot() to authorize this
900  * operation.
901  */
902 int
903 change_root(vp, td)
904 	struct vnode *vp;
905 	struct thread *td;
906 {
907 	struct filedesc *fdp;
908 	struct vnode *oldvp;
909 	int vfslocked;
910 	int error;
911 
912 	VFS_ASSERT_GIANT(vp->v_mount);
913 	fdp = td->td_proc->p_fd;
914 	FILEDESC_LOCK(fdp);
915 	if (chroot_allow_open_directories == 0 ||
916 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
917 		error = chroot_refuse_vdir_fds(fdp);
918 		if (error) {
919 			FILEDESC_UNLOCK(fdp);
920 			return (error);
921 		}
922 	}
923 	oldvp = fdp->fd_rdir;
924 	fdp->fd_rdir = vp;
925 	VREF(fdp->fd_rdir);
926 	if (!fdp->fd_jdir) {
927 		fdp->fd_jdir = vp;
928 		VREF(fdp->fd_jdir);
929 	}
930 	FILEDESC_UNLOCK(fdp);
931 	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
932 	vrele(oldvp);
933 	VFS_UNLOCK_GIANT(vfslocked);
934 	return (0);
935 }
936 
937 /*
938  * Check permissions, allocate an open file structure,
939  * and call the device open routine if any.
940  *
941  * MP SAFE
942  */
943 #ifndef _SYS_SYSPROTO_H_
944 struct open_args {
945 	char	*path;
946 	int	flags;
947 	int	mode;
948 };
949 #endif
950 int
951 open(td, uap)
952 	struct thread *td;
953 	register struct open_args /* {
954 		char *path;
955 		int flags;
956 		int mode;
957 	} */ *uap;
958 {
959 
960 	return kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
961 }
962 
963 int
964 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
965     int mode)
966 {
967 	struct proc *p = td->td_proc;
968 	struct filedesc *fdp = p->p_fd;
969 	struct file *fp;
970 	struct vnode *vp;
971 	struct vattr vat;
972 	struct mount *mp;
973 	int cmode;
974 	struct file *nfp;
975 	int type, indx, error;
976 	struct flock lf;
977 	struct nameidata nd;
978 	int vfslocked;
979 
980 	AUDIT_ARG(fflags, flags);
981 	AUDIT_ARG(mode, mode);
982 	if ((flags & O_ACCMODE) == O_ACCMODE)
983 		return (EINVAL);
984 	flags = FFLAGS(flags);
985 	error = falloc(td, &nfp, &indx);
986 	if (error)
987 		return (error);
988 	/* An extra reference on `nfp' has been held for us by falloc(). */
989 	fp = nfp;
990 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
991 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td);
992 	td->td_dupfd = -1;		/* XXX check for fdopen */
993 	error = vn_open(&nd, &flags, cmode, indx);
994 	if (error) {
995 		/*
996 		 * If the vn_open replaced the method vector, something
997 		 * wonderous happened deep below and we just pass it up
998 		 * pretending we know what we do.
999 		 */
1000 		if (error == ENXIO && fp->f_ops != &badfileops) {
1001 			fdrop(fp, td);
1002 			td->td_retval[0] = indx;
1003 			return (0);
1004 		}
1005 
1006 		/*
1007 		 * release our own reference
1008 		 */
1009 		fdrop(fp, td);
1010 
1011 		/*
1012 		 * handle special fdopen() case.  bleh.  dupfdopen() is
1013 		 * responsible for dropping the old contents of ofiles[indx]
1014 		 * if it succeeds.
1015 		 */
1016 		if ((error == ENODEV || error == ENXIO) &&
1017 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1018 		    (error =
1019 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1020 			td->td_retval[0] = indx;
1021 			return (0);
1022 		}
1023 		/*
1024 		 * Clean up the descriptor, but only if another thread hadn't
1025 		 * replaced or closed it.
1026 		 */
1027 		fdclose(fdp, fp, indx, td);
1028 
1029 		if (error == ERESTART)
1030 			error = EINTR;
1031 		return (error);
1032 	}
1033 	td->td_dupfd = 0;
1034 	vfslocked = NDHASGIANT(&nd);
1035 	NDFREE(&nd, NDF_ONLY_PNBUF);
1036 	vp = nd.ni_vp;
1037 
1038 	/*
1039 	 * There should be 2 references on the file, one from the descriptor
1040 	 * table, and one for us.
1041 	 *
1042 	 * Handle the case where someone closed the file (via its file
1043 	 * descriptor) while we were blocked.  The end result should look
1044 	 * like opening the file succeeded but it was immediately closed.
1045 	 * We call vn_close() manually because we haven't yet hooked up
1046 	 * the various 'struct file' fields.
1047 	 */
1048 	FILEDESC_LOCK(fdp);
1049 	FILE_LOCK(fp);
1050 	if (fp->f_count == 1) {
1051 		mp = vp->v_mount;
1052 		KASSERT(fdp->fd_ofiles[indx] != fp,
1053 		    ("Open file descriptor lost all refs"));
1054 		FILE_UNLOCK(fp);
1055 		FILEDESC_UNLOCK(fdp);
1056 		VOP_UNLOCK(vp, 0, td);
1057 		vn_close(vp, flags & FMASK, fp->f_cred, td);
1058 		VFS_UNLOCK_GIANT(vfslocked);
1059 		fdrop(fp, td);
1060 		td->td_retval[0] = indx;
1061 		return (0);
1062 	}
1063 	fp->f_vnode = vp;
1064 	if (fp->f_data == NULL)
1065 		fp->f_data = vp;
1066 	fp->f_flag = flags & FMASK;
1067 	if (fp->f_ops == &badfileops)
1068 		fp->f_ops = &vnops;
1069 	fp->f_seqcount = 1;
1070 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1071 	FILE_UNLOCK(fp);
1072 	FILEDESC_UNLOCK(fdp);
1073 
1074 	VOP_UNLOCK(vp, 0, td);
1075 	if (flags & (O_EXLOCK | O_SHLOCK)) {
1076 		lf.l_whence = SEEK_SET;
1077 		lf.l_start = 0;
1078 		lf.l_len = 0;
1079 		if (flags & O_EXLOCK)
1080 			lf.l_type = F_WRLCK;
1081 		else
1082 			lf.l_type = F_RDLCK;
1083 		type = F_FLOCK;
1084 		if ((flags & FNONBLOCK) == 0)
1085 			type |= F_WAIT;
1086 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1087 			    type)) != 0)
1088 			goto bad;
1089 		fp->f_flag |= FHASLOCK;
1090 	}
1091 	if (flags & O_TRUNC) {
1092 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1093 			goto bad;
1094 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1095 		VATTR_NULL(&vat);
1096 		vat.va_size = 0;
1097 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1098 #ifdef MAC
1099 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1100 		if (error == 0)
1101 #endif
1102 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1103 		VOP_UNLOCK(vp, 0, td);
1104 		vn_finished_write(mp);
1105 		if (error)
1106 			goto bad;
1107 	}
1108 	VFS_UNLOCK_GIANT(vfslocked);
1109 	/*
1110 	 * Release our private reference, leaving the one associated with
1111 	 * the descriptor table intact.
1112 	 */
1113 	fdrop(fp, td);
1114 	td->td_retval[0] = indx;
1115 	return (0);
1116 bad:
1117 	VFS_UNLOCK_GIANT(vfslocked);
1118 	fdclose(fdp, fp, indx, td);
1119 	fdrop(fp, td);
1120 	return (error);
1121 }
1122 
1123 #ifdef COMPAT_43
1124 /*
1125  * Create a file.
1126  *
1127  * MP SAFE
1128  */
1129 #ifndef _SYS_SYSPROTO_H_
1130 struct ocreat_args {
1131 	char	*path;
1132 	int	mode;
1133 };
1134 #endif
1135 int
1136 ocreat(td, uap)
1137 	struct thread *td;
1138 	register struct ocreat_args /* {
1139 		char *path;
1140 		int mode;
1141 	} */ *uap;
1142 {
1143 
1144 	return (kern_open(td, uap->path, UIO_USERSPACE,
1145 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1146 }
1147 #endif /* COMPAT_43 */
1148 
1149 /*
1150  * Create a special file.
1151  */
1152 #ifndef _SYS_SYSPROTO_H_
1153 struct mknod_args {
1154 	char	*path;
1155 	int	mode;
1156 	int	dev;
1157 };
1158 #endif
1159 int
1160 mknod(td, uap)
1161 	struct thread *td;
1162 	register struct mknod_args /* {
1163 		char *path;
1164 		int mode;
1165 		int dev;
1166 	} */ *uap;
1167 {
1168 
1169 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1170 }
1171 
1172 int
1173 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1174     int dev)
1175 {
1176 	struct vnode *vp;
1177 	struct mount *mp;
1178 	struct vattr vattr;
1179 	int error;
1180 	int whiteout = 0;
1181 	struct nameidata nd;
1182 	int vfslocked;
1183 
1184 	AUDIT_ARG(mode, mode);
1185 	AUDIT_ARG(dev, dev);
1186 	switch (mode & S_IFMT) {
1187 	case S_IFCHR:
1188 	case S_IFBLK:
1189 		error = suser(td);
1190 		break;
1191 	default:
1192 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1193 		break;
1194 	}
1195 	if (error)
1196 		return (error);
1197 restart:
1198 	bwillwrite();
1199 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1200 	    pathseg, path, td);
1201 	if ((error = namei(&nd)) != 0)
1202 		return (error);
1203 	vfslocked = NDHASGIANT(&nd);
1204 	vp = nd.ni_vp;
1205 	if (vp != NULL) {
1206 		NDFREE(&nd, NDF_ONLY_PNBUF);
1207 		if (vp == nd.ni_dvp)
1208 			vrele(nd.ni_dvp);
1209 		else
1210 			vput(nd.ni_dvp);
1211 		vrele(vp);
1212 		VFS_UNLOCK_GIANT(vfslocked);
1213 		return (EEXIST);
1214 	} else {
1215 		VATTR_NULL(&vattr);
1216 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1217 		vattr.va_mode = (mode & ALLPERMS) &
1218 		    ~td->td_proc->p_fd->fd_cmask;
1219 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1220 		vattr.va_rdev = dev;
1221 		whiteout = 0;
1222 
1223 		switch (mode & S_IFMT) {
1224 		case S_IFMT:	/* used by badsect to flag bad sectors */
1225 			vattr.va_type = VBAD;
1226 			break;
1227 		case S_IFCHR:
1228 			vattr.va_type = VCHR;
1229 			break;
1230 		case S_IFBLK:
1231 			vattr.va_type = VBLK;
1232 			break;
1233 		case S_IFWHT:
1234 			whiteout = 1;
1235 			break;
1236 		default:
1237 			error = EINVAL;
1238 			break;
1239 		}
1240 	}
1241 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1242 		NDFREE(&nd, NDF_ONLY_PNBUF);
1243 		vput(nd.ni_dvp);
1244 		VFS_UNLOCK_GIANT(vfslocked);
1245 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1246 			return (error);
1247 		goto restart;
1248 	}
1249 #ifdef MAC
1250 	if (error == 0 && !whiteout)
1251 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1252 		    &nd.ni_cnd, &vattr);
1253 #endif
1254 	if (!error) {
1255 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1256 		if (whiteout)
1257 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1258 		else {
1259 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1260 						&nd.ni_cnd, &vattr);
1261 			if (error == 0)
1262 				vput(nd.ni_vp);
1263 		}
1264 	}
1265 	NDFREE(&nd, NDF_ONLY_PNBUF);
1266 	vput(nd.ni_dvp);
1267 	vn_finished_write(mp);
1268 	VFS_UNLOCK_GIANT(vfslocked);
1269 	return (error);
1270 }
1271 
1272 /*
1273  * Create a named pipe.
1274  */
1275 #ifndef _SYS_SYSPROTO_H_
1276 struct mkfifo_args {
1277 	char	*path;
1278 	int	mode;
1279 };
1280 #endif
1281 int
1282 mkfifo(td, uap)
1283 	struct thread *td;
1284 	register struct mkfifo_args /* {
1285 		char *path;
1286 		int mode;
1287 	} */ *uap;
1288 {
1289 
1290 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1291 }
1292 
1293 int
1294 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1295 {
1296 	struct mount *mp;
1297 	struct vattr vattr;
1298 	int error;
1299 	struct nameidata nd;
1300 	int vfslocked;
1301 
1302 	AUDIT_ARG(mode, mode);
1303 restart:
1304 	bwillwrite();
1305 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1306 	    pathseg, path, td);
1307 	if ((error = namei(&nd)) != 0)
1308 		return (error);
1309 	vfslocked = NDHASGIANT(&nd);
1310 	if (nd.ni_vp != NULL) {
1311 		NDFREE(&nd, NDF_ONLY_PNBUF);
1312 		if (nd.ni_vp == nd.ni_dvp)
1313 			vrele(nd.ni_dvp);
1314 		else
1315 			vput(nd.ni_dvp);
1316 		vrele(nd.ni_vp);
1317 		VFS_UNLOCK_GIANT(vfslocked);
1318 		return (EEXIST);
1319 	}
1320 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1321 		NDFREE(&nd, NDF_ONLY_PNBUF);
1322 		vput(nd.ni_dvp);
1323 		VFS_UNLOCK_GIANT(vfslocked);
1324 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1325 			return (error);
1326 		goto restart;
1327 	}
1328 	VATTR_NULL(&vattr);
1329 	vattr.va_type = VFIFO;
1330 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1331 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1332 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1333 #ifdef MAC
1334 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1335 	    &vattr);
1336 	if (error)
1337 		goto out;
1338 #endif
1339 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1340 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1341 	if (error == 0)
1342 		vput(nd.ni_vp);
1343 #ifdef MAC
1344 out:
1345 #endif
1346 	vput(nd.ni_dvp);
1347 	vn_finished_write(mp);
1348 	VFS_UNLOCK_GIANT(vfslocked);
1349 	NDFREE(&nd, NDF_ONLY_PNBUF);
1350 	return (error);
1351 }
1352 
1353 /*
1354  * Make a hard file link.
1355  */
1356 #ifndef _SYS_SYSPROTO_H_
1357 struct link_args {
1358 	char	*path;
1359 	char	*link;
1360 };
1361 #endif
1362 int
1363 link(td, uap)
1364 	struct thread *td;
1365 	register struct link_args /* {
1366 		char *path;
1367 		char *link;
1368 	} */ *uap;
1369 {
1370 	int error;
1371 
1372 	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1373 	return (error);
1374 }
1375 
1376 static int hardlink_check_uid = 0;
1377 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1378     &hardlink_check_uid, 0,
1379     "Unprivileged processes cannot create hard links to files owned by other "
1380     "users");
1381 static int hardlink_check_gid = 0;
1382 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1383     &hardlink_check_gid, 0,
1384     "Unprivileged processes cannot create hard links to files owned by other "
1385     "groups");
1386 
1387 static int
1388 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1389 {
1390 	struct vattr va;
1391 	int error;
1392 
1393 	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1394 		return (0);
1395 
1396 	if (!hardlink_check_uid && !hardlink_check_gid)
1397 		return (0);
1398 
1399 	error = VOP_GETATTR(vp, &va, cred, td);
1400 	if (error != 0)
1401 		return (error);
1402 
1403 	if (hardlink_check_uid) {
1404 		if (cred->cr_uid != va.va_uid)
1405 			return (EPERM);
1406 	}
1407 
1408 	if (hardlink_check_gid) {
1409 		if (!groupmember(va.va_gid, cred))
1410 			return (EPERM);
1411 	}
1412 
1413 	return (0);
1414 }
1415 
1416 int
1417 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1418 {
1419 	struct vnode *vp;
1420 	struct mount *mp;
1421 	struct nameidata nd;
1422 	int vfslocked;
1423 	int lvfslocked;
1424 	int error;
1425 
1426 	bwillwrite();
1427 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, segflg, path, td);
1428 	if ((error = namei(&nd)) != 0)
1429 		return (error);
1430 	vfslocked = NDHASGIANT(&nd);
1431 	NDFREE(&nd, NDF_ONLY_PNBUF);
1432 	vp = nd.ni_vp;
1433 	if (vp->v_type == VDIR) {
1434 		vrele(vp);
1435 		VFS_UNLOCK_GIANT(vfslocked);
1436 		return (EPERM);		/* POSIX */
1437 	}
1438 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1439 		vrele(vp);
1440 		VFS_UNLOCK_GIANT(vfslocked);
1441 		return (error);
1442 	}
1443 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE2,
1444 	    segflg, link, td);
1445 	if ((error = namei(&nd)) == 0) {
1446 		lvfslocked = NDHASGIANT(&nd);
1447 		if (nd.ni_vp != NULL) {
1448 			if (nd.ni_dvp == nd.ni_vp)
1449 				vrele(nd.ni_dvp);
1450 			else
1451 				vput(nd.ni_dvp);
1452 			vrele(nd.ni_vp);
1453 			error = EEXIST;
1454 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1455 		    == 0) {
1456 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1457 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1458 			error = can_hardlink(vp, td, td->td_ucred);
1459 			if (error == 0)
1460 #ifdef MAC
1461 				error = mac_check_vnode_link(td->td_ucred,
1462 				    nd.ni_dvp, vp, &nd.ni_cnd);
1463 			if (error == 0)
1464 #endif
1465 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1466 			VOP_UNLOCK(vp, 0, td);
1467 			vput(nd.ni_dvp);
1468 		}
1469 		NDFREE(&nd, NDF_ONLY_PNBUF);
1470 		VFS_UNLOCK_GIANT(lvfslocked);
1471 	}
1472 	vrele(vp);
1473 	vn_finished_write(mp);
1474 	VFS_UNLOCK_GIANT(vfslocked);
1475 	return (error);
1476 }
1477 
1478 /*
1479  * Make a symbolic link.
1480  */
1481 #ifndef _SYS_SYSPROTO_H_
1482 struct symlink_args {
1483 	char	*path;
1484 	char	*link;
1485 };
1486 #endif
1487 int
1488 symlink(td, uap)
1489 	struct thread *td;
1490 	register struct symlink_args /* {
1491 		char *path;
1492 		char *link;
1493 	} */ *uap;
1494 {
1495 
1496 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1497 }
1498 
1499 int
1500 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1501 {
1502 	struct mount *mp;
1503 	struct vattr vattr;
1504 	char *syspath;
1505 	int error;
1506 	struct nameidata nd;
1507 	int vfslocked;
1508 
1509 	if (segflg == UIO_SYSSPACE) {
1510 		syspath = path;
1511 	} else {
1512 		syspath = uma_zalloc(namei_zone, M_WAITOK);
1513 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1514 			goto out;
1515 	}
1516 	AUDIT_ARG(text, syspath);
1517 restart:
1518 	bwillwrite();
1519 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
1520 	    segflg, link, td);
1521 	if ((error = namei(&nd)) != 0)
1522 		goto out;
1523 	vfslocked = NDHASGIANT(&nd);
1524 	if (nd.ni_vp) {
1525 		NDFREE(&nd, NDF_ONLY_PNBUF);
1526 		if (nd.ni_vp == nd.ni_dvp)
1527 			vrele(nd.ni_dvp);
1528 		else
1529 			vput(nd.ni_dvp);
1530 		vrele(nd.ni_vp);
1531 		VFS_UNLOCK_GIANT(vfslocked);
1532 		error = EEXIST;
1533 		goto out;
1534 	}
1535 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1536 		NDFREE(&nd, NDF_ONLY_PNBUF);
1537 		vput(nd.ni_dvp);
1538 		VFS_UNLOCK_GIANT(vfslocked);
1539 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1540 			goto out;
1541 		goto restart;
1542 	}
1543 	VATTR_NULL(&vattr);
1544 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1545 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1546 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1547 #ifdef MAC
1548 	vattr.va_type = VLNK;
1549 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1550 	    &vattr);
1551 	if (error)
1552 		goto out2;
1553 #endif
1554 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1555 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1556 	if (error == 0)
1557 		vput(nd.ni_vp);
1558 #ifdef MAC
1559 out2:
1560 #endif
1561 	NDFREE(&nd, NDF_ONLY_PNBUF);
1562 	vput(nd.ni_dvp);
1563 	vn_finished_write(mp);
1564 	VFS_UNLOCK_GIANT(vfslocked);
1565 out:
1566 	if (segflg != UIO_SYSSPACE)
1567 		uma_zfree(namei_zone, syspath);
1568 	return (error);
1569 }
1570 
1571 /*
1572  * Delete a whiteout from the filesystem.
1573  */
1574 int
1575 undelete(td, uap)
1576 	struct thread *td;
1577 	register struct undelete_args /* {
1578 		char *path;
1579 	} */ *uap;
1580 {
1581 	int error;
1582 	struct mount *mp;
1583 	struct nameidata nd;
1584 	int vfslocked;
1585 
1586 restart:
1587 	bwillwrite();
1588 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE | AUDITVNODE1,
1589 	    UIO_USERSPACE, uap->path, td);
1590 	error = namei(&nd);
1591 	if (error)
1592 		return (error);
1593 	vfslocked = NDHASGIANT(&nd);
1594 
1595 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1596 		NDFREE(&nd, NDF_ONLY_PNBUF);
1597 		if (nd.ni_vp == nd.ni_dvp)
1598 			vrele(nd.ni_dvp);
1599 		else
1600 			vput(nd.ni_dvp);
1601 		if (nd.ni_vp)
1602 			vrele(nd.ni_vp);
1603 		VFS_UNLOCK_GIANT(vfslocked);
1604 		return (EEXIST);
1605 	}
1606 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1607 		NDFREE(&nd, NDF_ONLY_PNBUF);
1608 		vput(nd.ni_dvp);
1609 		VFS_UNLOCK_GIANT(vfslocked);
1610 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1611 			return (error);
1612 		goto restart;
1613 	}
1614 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1615 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1616 	NDFREE(&nd, NDF_ONLY_PNBUF);
1617 	vput(nd.ni_dvp);
1618 	vn_finished_write(mp);
1619 	VFS_UNLOCK_GIANT(vfslocked);
1620 	return (error);
1621 }
1622 
1623 /*
1624  * Delete a name from the filesystem.
1625  */
1626 #ifndef _SYS_SYSPROTO_H_
1627 struct unlink_args {
1628 	char	*path;
1629 };
1630 #endif
1631 int
1632 unlink(td, uap)
1633 	struct thread *td;
1634 	struct unlink_args /* {
1635 		char *path;
1636 	} */ *uap;
1637 {
1638 	int error;
1639 
1640 	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1641 	return (error);
1642 }
1643 
1644 int
1645 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1646 {
1647 	struct mount *mp;
1648 	struct vnode *vp;
1649 	int error;
1650 	struct nameidata nd;
1651 	int vfslocked;
1652 
1653 restart:
1654 	bwillwrite();
1655 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
1656 	    pathseg, path, td);
1657 	if ((error = namei(&nd)) != 0)
1658 		return (error == EINVAL ? EPERM : error);
1659 	vfslocked = NDHASGIANT(&nd);
1660 	vp = nd.ni_vp;
1661 	if (vp->v_type == VDIR)
1662 		error = EPERM;		/* POSIX */
1663 	else {
1664 		/*
1665 		 * The root of a mounted filesystem cannot be deleted.
1666 		 *
1667 		 * XXX: can this only be a VDIR case?
1668 		 */
1669 		if (vp->v_vflag & VV_ROOT)
1670 			error = EBUSY;
1671 	}
1672 	if (error == 0) {
1673 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1674 			NDFREE(&nd, NDF_ONLY_PNBUF);
1675 			vput(nd.ni_dvp);
1676 			if (vp == nd.ni_dvp)
1677 				vrele(vp);
1678 			else
1679 				vput(vp);
1680 			VFS_UNLOCK_GIANT(vfslocked);
1681 			if ((error = vn_start_write(NULL, &mp,
1682 			    V_XSLEEP | PCATCH)) != 0)
1683 				return (error);
1684 			goto restart;
1685 		}
1686 #ifdef MAC
1687 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1688 		    &nd.ni_cnd);
1689 		if (error)
1690 			goto out;
1691 #endif
1692 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1693 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1694 #ifdef MAC
1695 out:
1696 #endif
1697 		vn_finished_write(mp);
1698 	}
1699 	NDFREE(&nd, NDF_ONLY_PNBUF);
1700 	vput(nd.ni_dvp);
1701 	if (vp == nd.ni_dvp)
1702 		vrele(vp);
1703 	else
1704 		vput(vp);
1705 	VFS_UNLOCK_GIANT(vfslocked);
1706 	return (error);
1707 }
1708 
1709 /*
1710  * Reposition read/write file offset.
1711  */
1712 #ifndef _SYS_SYSPROTO_H_
1713 struct lseek_args {
1714 	int	fd;
1715 	int	pad;
1716 	off_t	offset;
1717 	int	whence;
1718 };
1719 #endif
1720 int
1721 lseek(td, uap)
1722 	struct thread *td;
1723 	register struct lseek_args /* {
1724 		int fd;
1725 		int pad;
1726 		off_t offset;
1727 		int whence;
1728 	} */ *uap;
1729 {
1730 	struct ucred *cred = td->td_ucred;
1731 	struct file *fp;
1732 	struct vnode *vp;
1733 	struct vattr vattr;
1734 	off_t offset;
1735 	int error, noneg;
1736 	int vfslocked;
1737 
1738 	if ((error = fget(td, uap->fd, &fp)) != 0)
1739 		return (error);
1740 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1741 		fdrop(fp, td);
1742 		return (ESPIPE);
1743 	}
1744 	vp = fp->f_vnode;
1745 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1746 	noneg = (vp->v_type != VCHR);
1747 	offset = uap->offset;
1748 	switch (uap->whence) {
1749 	case L_INCR:
1750 		if (noneg &&
1751 		    (fp->f_offset < 0 ||
1752 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1753 			error = EOVERFLOW;
1754 			break;
1755 		}
1756 		offset += fp->f_offset;
1757 		break;
1758 	case L_XTND:
1759 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1760 		error = VOP_GETATTR(vp, &vattr, cred, td);
1761 		VOP_UNLOCK(vp, 0, td);
1762 		if (error)
1763 			break;
1764 		if (noneg &&
1765 		    (vattr.va_size > OFF_MAX ||
1766 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1767 			error = EOVERFLOW;
1768 			break;
1769 		}
1770 		offset += vattr.va_size;
1771 		break;
1772 	case L_SET:
1773 		break;
1774 	default:
1775 		error = EINVAL;
1776 	}
1777 	if (error == 0 && noneg && offset < 0)
1778 		error = EINVAL;
1779 	if (error != 0)
1780 		goto drop;
1781 	fp->f_offset = offset;
1782 	*(off_t *)(td->td_retval) = fp->f_offset;
1783 drop:
1784 	fdrop(fp, td);
1785 	VFS_UNLOCK_GIANT(vfslocked);
1786 	return (error);
1787 }
1788 
1789 #if defined(COMPAT_43)
1790 /*
1791  * Reposition read/write file offset.
1792  */
1793 #ifndef _SYS_SYSPROTO_H_
1794 struct olseek_args {
1795 	int	fd;
1796 	long	offset;
1797 	int	whence;
1798 };
1799 #endif
1800 int
1801 olseek(td, uap)
1802 	struct thread *td;
1803 	register struct olseek_args /* {
1804 		int fd;
1805 		long offset;
1806 		int whence;
1807 	} */ *uap;
1808 {
1809 	struct lseek_args /* {
1810 		int fd;
1811 		int pad;
1812 		off_t offset;
1813 		int whence;
1814 	} */ nuap;
1815 	int error;
1816 
1817 	nuap.fd = uap->fd;
1818 	nuap.offset = uap->offset;
1819 	nuap.whence = uap->whence;
1820 	error = lseek(td, &nuap);
1821 	return (error);
1822 }
1823 #endif /* COMPAT_43 */
1824 
1825 /*
1826  * Check access permissions using passed credentials.
1827  */
1828 static int
1829 vn_access(vp, user_flags, cred, td)
1830 	struct vnode	*vp;
1831 	int		user_flags;
1832 	struct ucred	*cred;
1833 	struct thread	*td;
1834 {
1835 	int error, flags;
1836 
1837 	/* Flags == 0 means only check for existence. */
1838 	error = 0;
1839 	if (user_flags) {
1840 		flags = 0;
1841 		if (user_flags & R_OK)
1842 			flags |= VREAD;
1843 		if (user_flags & W_OK)
1844 			flags |= VWRITE;
1845 		if (user_flags & X_OK)
1846 			flags |= VEXEC;
1847 #ifdef MAC
1848 		error = mac_check_vnode_access(cred, vp, flags);
1849 		if (error)
1850 			return (error);
1851 #endif
1852 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1853 			error = VOP_ACCESS(vp, flags, cred, td);
1854 	}
1855 	return (error);
1856 }
1857 
1858 /*
1859  * Check access permissions using "real" credentials.
1860  */
1861 #ifndef _SYS_SYSPROTO_H_
1862 struct access_args {
1863 	char	*path;
1864 	int	flags;
1865 };
1866 #endif
1867 int
1868 access(td, uap)
1869 	struct thread *td;
1870 	register struct access_args /* {
1871 		char *path;
1872 		int flags;
1873 	} */ *uap;
1874 {
1875 
1876 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1877 }
1878 
1879 int
1880 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1881 {
1882 	struct ucred *cred, *tmpcred;
1883 	register struct vnode *vp;
1884 	struct nameidata nd;
1885 	int vfslocked;
1886 	int error;
1887 
1888 	/*
1889 	 * Create and modify a temporary credential instead of one that
1890 	 * is potentially shared.  This could also mess up socket
1891 	 * buffer accounting which can run in an interrupt context.
1892 	 */
1893 	cred = td->td_ucred;
1894 	tmpcred = crdup(cred);
1895 	tmpcred->cr_uid = cred->cr_ruid;
1896 	tmpcred->cr_groups[0] = cred->cr_rgid;
1897 	td->td_ucred = tmpcred;
1898 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1899 	    pathseg, path, td);
1900 	if ((error = namei(&nd)) != 0)
1901 		goto out1;
1902 	vfslocked = NDHASGIANT(&nd);
1903 	vp = nd.ni_vp;
1904 
1905 	error = vn_access(vp, flags, tmpcred, td);
1906 	NDFREE(&nd, NDF_ONLY_PNBUF);
1907 	vput(vp);
1908 	VFS_UNLOCK_GIANT(vfslocked);
1909 out1:
1910 	td->td_ucred = cred;
1911 	crfree(tmpcred);
1912 	return (error);
1913 }
1914 
1915 /*
1916  * Check access permissions using "effective" credentials.
1917  */
1918 #ifndef _SYS_SYSPROTO_H_
1919 struct eaccess_args {
1920 	char	*path;
1921 	int	flags;
1922 };
1923 #endif
1924 int
1925 eaccess(td, uap)
1926 	struct thread *td;
1927 	register struct eaccess_args /* {
1928 		char *path;
1929 		int flags;
1930 	} */ *uap;
1931 {
1932 
1933 	return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->flags));
1934 }
1935 
1936 int
1937 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1938 {
1939 	struct nameidata nd;
1940 	struct vnode *vp;
1941 	int vfslocked;
1942 	int error;
1943 
1944 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1945 	    pathseg, path, td);
1946 	if ((error = namei(&nd)) != 0)
1947 		return (error);
1948 	vp = nd.ni_vp;
1949 	vfslocked = NDHASGIANT(&nd);
1950 	error = vn_access(vp, flags, td->td_ucred, td);
1951 	NDFREE(&nd, NDF_ONLY_PNBUF);
1952 	vput(vp);
1953 	VFS_UNLOCK_GIANT(vfslocked);
1954 	return (error);
1955 }
1956 
1957 #if defined(COMPAT_43)
1958 /*
1959  * Get file status; this version follows links.
1960  */
1961 #ifndef _SYS_SYSPROTO_H_
1962 struct ostat_args {
1963 	char	*path;
1964 	struct ostat *ub;
1965 };
1966 #endif
1967 int
1968 ostat(td, uap)
1969 	struct thread *td;
1970 	register struct ostat_args /* {
1971 		char *path;
1972 		struct ostat *ub;
1973 	} */ *uap;
1974 {
1975 	struct stat sb;
1976 	struct ostat osb;
1977 	int error;
1978 
1979 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1980 	if (error)
1981 		return (error);
1982 	cvtstat(&sb, &osb);
1983 	error = copyout(&osb, uap->ub, sizeof (osb));
1984 	return (error);
1985 }
1986 
1987 /*
1988  * Get file status; this version does not follow links.
1989  */
1990 #ifndef _SYS_SYSPROTO_H_
1991 struct olstat_args {
1992 	char	*path;
1993 	struct ostat *ub;
1994 };
1995 #endif
1996 int
1997 olstat(td, uap)
1998 	struct thread *td;
1999 	register struct olstat_args /* {
2000 		char *path;
2001 		struct ostat *ub;
2002 	} */ *uap;
2003 {
2004 	struct stat sb;
2005 	struct ostat osb;
2006 	int error;
2007 
2008 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2009 	if (error)
2010 		return (error);
2011 	cvtstat(&sb, &osb);
2012 	error = copyout(&osb, uap->ub, sizeof (osb));
2013 	return (error);
2014 }
2015 
2016 /*
2017  * Convert from an old to a new stat structure.
2018  */
2019 void
2020 cvtstat(st, ost)
2021 	struct stat *st;
2022 	struct ostat *ost;
2023 {
2024 
2025 	ost->st_dev = st->st_dev;
2026 	ost->st_ino = st->st_ino;
2027 	ost->st_mode = st->st_mode;
2028 	ost->st_nlink = st->st_nlink;
2029 	ost->st_uid = st->st_uid;
2030 	ost->st_gid = st->st_gid;
2031 	ost->st_rdev = st->st_rdev;
2032 	if (st->st_size < (quad_t)1 << 32)
2033 		ost->st_size = st->st_size;
2034 	else
2035 		ost->st_size = -2;
2036 	ost->st_atime = st->st_atime;
2037 	ost->st_mtime = st->st_mtime;
2038 	ost->st_ctime = st->st_ctime;
2039 	ost->st_blksize = st->st_blksize;
2040 	ost->st_blocks = st->st_blocks;
2041 	ost->st_flags = st->st_flags;
2042 	ost->st_gen = st->st_gen;
2043 }
2044 #endif /* COMPAT_43 */
2045 
2046 /*
2047  * Get file status; this version follows links.
2048  */
2049 #ifndef _SYS_SYSPROTO_H_
2050 struct stat_args {
2051 	char	*path;
2052 	struct stat *ub;
2053 };
2054 #endif
2055 int
2056 stat(td, uap)
2057 	struct thread *td;
2058 	register struct stat_args /* {
2059 		char *path;
2060 		struct stat *ub;
2061 	} */ *uap;
2062 {
2063 	struct stat sb;
2064 	int error;
2065 
2066 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2067 	if (error == 0)
2068 		error = copyout(&sb, uap->ub, sizeof (sb));
2069 	return (error);
2070 }
2071 
2072 int
2073 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2074 {
2075 	struct nameidata nd;
2076 	struct stat sb;
2077 	int error, vfslocked;
2078 
2079 	NDINIT(&nd, LOOKUP,
2080 	    FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE | AUDITVNODE1,
2081 	    pathseg, path, td);
2082 	if ((error = namei(&nd)) != 0)
2083 		return (error);
2084 	vfslocked = NDHASGIANT(&nd);
2085 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2086 	NDFREE(&nd, NDF_ONLY_PNBUF);
2087 	vput(nd.ni_vp);
2088 	VFS_UNLOCK_GIANT(vfslocked);
2089 	if (mtx_owned(&Giant))
2090 		printf("stat(%d): %s\n", vfslocked, path);
2091 	if (error)
2092 		return (error);
2093 	*sbp = sb;
2094 	return (0);
2095 }
2096 
2097 /*
2098  * Get file status; this version does not follow links.
2099  */
2100 #ifndef _SYS_SYSPROTO_H_
2101 struct lstat_args {
2102 	char	*path;
2103 	struct stat *ub;
2104 };
2105 #endif
2106 int
2107 lstat(td, uap)
2108 	struct thread *td;
2109 	register struct lstat_args /* {
2110 		char *path;
2111 		struct stat *ub;
2112 	} */ *uap;
2113 {
2114 	struct stat sb;
2115 	int error;
2116 
2117 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2118 	if (error == 0)
2119 		error = copyout(&sb, uap->ub, sizeof (sb));
2120 	return (error);
2121 }
2122 
2123 int
2124 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2125 {
2126 	struct vnode *vp;
2127 	struct stat sb;
2128 	struct nameidata nd;
2129 	int error, vfslocked;
2130 
2131 	NDINIT(&nd, LOOKUP,
2132 	    NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE | AUDITVNODE1,
2133 	    pathseg, path, td);
2134 	if ((error = namei(&nd)) != 0)
2135 		return (error);
2136 	vfslocked = NDHASGIANT(&nd);
2137 	vp = nd.ni_vp;
2138 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2139 	NDFREE(&nd, NDF_ONLY_PNBUF);
2140 	vput(vp);
2141 	VFS_UNLOCK_GIANT(vfslocked);
2142 	if (error)
2143 		return (error);
2144 	*sbp = sb;
2145 	return (0);
2146 }
2147 
2148 /*
2149  * Implementation of the NetBSD [l]stat() functions.
2150  */
2151 void
2152 cvtnstat(sb, nsb)
2153 	struct stat *sb;
2154 	struct nstat *nsb;
2155 {
2156 	bzero(nsb, sizeof *nsb);
2157 	nsb->st_dev = sb->st_dev;
2158 	nsb->st_ino = sb->st_ino;
2159 	nsb->st_mode = sb->st_mode;
2160 	nsb->st_nlink = sb->st_nlink;
2161 	nsb->st_uid = sb->st_uid;
2162 	nsb->st_gid = sb->st_gid;
2163 	nsb->st_rdev = sb->st_rdev;
2164 	nsb->st_atimespec = sb->st_atimespec;
2165 	nsb->st_mtimespec = sb->st_mtimespec;
2166 	nsb->st_ctimespec = sb->st_ctimespec;
2167 	nsb->st_size = sb->st_size;
2168 	nsb->st_blocks = sb->st_blocks;
2169 	nsb->st_blksize = sb->st_blksize;
2170 	nsb->st_flags = sb->st_flags;
2171 	nsb->st_gen = sb->st_gen;
2172 	nsb->st_birthtimespec = sb->st_birthtimespec;
2173 }
2174 
2175 #ifndef _SYS_SYSPROTO_H_
2176 struct nstat_args {
2177 	char	*path;
2178 	struct nstat *ub;
2179 };
2180 #endif
2181 int
2182 nstat(td, uap)
2183 	struct thread *td;
2184 	register struct nstat_args /* {
2185 		char *path;
2186 		struct nstat *ub;
2187 	} */ *uap;
2188 {
2189 	struct stat sb;
2190 	struct nstat nsb;
2191 	int error;
2192 
2193 	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2194 	if (error)
2195 		return (error);
2196 	cvtnstat(&sb, &nsb);
2197 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2198 	return (error);
2199 }
2200 
2201 /*
2202  * NetBSD lstat.  Get file status; this version does not follow links.
2203  */
2204 #ifndef _SYS_SYSPROTO_H_
2205 struct lstat_args {
2206 	char	*path;
2207 	struct stat *ub;
2208 };
2209 #endif
2210 int
2211 nlstat(td, uap)
2212 	struct thread *td;
2213 	register struct nlstat_args /* {
2214 		char *path;
2215 		struct nstat *ub;
2216 	} */ *uap;
2217 {
2218 	struct stat sb;
2219 	struct nstat nsb;
2220 	int error;
2221 
2222 	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2223 	if (error)
2224 		return (error);
2225 	cvtnstat(&sb, &nsb);
2226 	error = copyout(&nsb, uap->ub, sizeof (nsb));
2227 	return (error);
2228 }
2229 
2230 /*
2231  * Get configurable pathname variables.
2232  */
2233 #ifndef _SYS_SYSPROTO_H_
2234 struct pathconf_args {
2235 	char	*path;
2236 	int	name;
2237 };
2238 #endif
2239 int
2240 pathconf(td, uap)
2241 	struct thread *td;
2242 	register struct pathconf_args /* {
2243 		char *path;
2244 		int name;
2245 	} */ *uap;
2246 {
2247 
2248 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2249 }
2250 
2251 int
2252 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2253 {
2254 	struct nameidata nd;
2255 	int error, vfslocked;
2256 
2257 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2258 	    pathseg, path, td);
2259 	if ((error = namei(&nd)) != 0)
2260 		return (error);
2261 	vfslocked = NDHASGIANT(&nd);
2262 	NDFREE(&nd, NDF_ONLY_PNBUF);
2263 
2264 	/* If asynchronous I/O is available, it works for all files. */
2265 	if (name == _PC_ASYNC_IO)
2266 		td->td_retval[0] = async_io_version;
2267 	else
2268 		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2269 	vput(nd.ni_vp);
2270 	VFS_UNLOCK_GIANT(vfslocked);
2271 	return (error);
2272 }
2273 
2274 /*
2275  * Return target name of a symbolic link.
2276  */
2277 #ifndef _SYS_SYSPROTO_H_
2278 struct readlink_args {
2279 	char	*path;
2280 	char	*buf;
2281 	int	count;
2282 };
2283 #endif
2284 int
2285 readlink(td, uap)
2286 	struct thread *td;
2287 	register struct readlink_args /* {
2288 		char *path;
2289 		char *buf;
2290 		int count;
2291 	} */ *uap;
2292 {
2293 
2294 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2295 	    UIO_USERSPACE, uap->count));
2296 }
2297 
2298 int
2299 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2300     enum uio_seg bufseg, int count)
2301 {
2302 	register struct vnode *vp;
2303 	struct iovec aiov;
2304 	struct uio auio;
2305 	int error;
2306 	struct nameidata nd;
2307 	int vfslocked;
2308 
2309 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
2310 	    pathseg, path, td);
2311 	if ((error = namei(&nd)) != 0)
2312 		return (error);
2313 	NDFREE(&nd, NDF_ONLY_PNBUF);
2314 	vfslocked = NDHASGIANT(&nd);
2315 	vp = nd.ni_vp;
2316 #ifdef MAC
2317 	error = mac_check_vnode_readlink(td->td_ucred, vp);
2318 	if (error) {
2319 		vput(vp);
2320 		VFS_UNLOCK_GIANT(vfslocked);
2321 		return (error);
2322 	}
2323 #endif
2324 	if (vp->v_type != VLNK)
2325 		error = EINVAL;
2326 	else {
2327 		aiov.iov_base = buf;
2328 		aiov.iov_len = count;
2329 		auio.uio_iov = &aiov;
2330 		auio.uio_iovcnt = 1;
2331 		auio.uio_offset = 0;
2332 		auio.uio_rw = UIO_READ;
2333 		auio.uio_segflg = bufseg;
2334 		auio.uio_td = td;
2335 		auio.uio_resid = count;
2336 		error = VOP_READLINK(vp, &auio, td->td_ucred);
2337 	}
2338 	vput(vp);
2339 	VFS_UNLOCK_GIANT(vfslocked);
2340 	td->td_retval[0] = count - auio.uio_resid;
2341 	return (error);
2342 }
2343 
2344 /*
2345  * Common implementation code for chflags() and fchflags().
2346  */
2347 static int
2348 setfflags(td, vp, flags)
2349 	struct thread *td;
2350 	struct vnode *vp;
2351 	int flags;
2352 {
2353 	int error;
2354 	struct mount *mp;
2355 	struct vattr vattr;
2356 
2357 	/*
2358 	 * Prevent non-root users from setting flags on devices.  When
2359 	 * a device is reused, users can retain ownership of the device
2360 	 * if they are allowed to set flags and programs assume that
2361 	 * chown can't fail when done as root.
2362 	 */
2363 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2364 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2365 		if (error)
2366 			return (error);
2367 	}
2368 
2369 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2370 		return (error);
2371 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2372 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2373 	VATTR_NULL(&vattr);
2374 	vattr.va_flags = flags;
2375 #ifdef MAC
2376 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2377 	if (error == 0)
2378 #endif
2379 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2380 	VOP_UNLOCK(vp, 0, td);
2381 	vn_finished_write(mp);
2382 	return (error);
2383 }
2384 
2385 /*
2386  * Change flags of a file given a path name.
2387  */
2388 #ifndef _SYS_SYSPROTO_H_
2389 struct chflags_args {
2390 	char	*path;
2391 	int	flags;
2392 };
2393 #endif
2394 int
2395 chflags(td, uap)
2396 	struct thread *td;
2397 	register struct chflags_args /* {
2398 		char *path;
2399 		int flags;
2400 	} */ *uap;
2401 {
2402 	int error;
2403 	struct nameidata nd;
2404 	int vfslocked;
2405 
2406 	AUDIT_ARG(fflags, uap->flags);
2407 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2408 	    uap->path, td);
2409 	if ((error = namei(&nd)) != 0)
2410 		return (error);
2411 	NDFREE(&nd, NDF_ONLY_PNBUF);
2412 	vfslocked = NDHASGIANT(&nd);
2413 	error = setfflags(td, nd.ni_vp, uap->flags);
2414 	vrele(nd.ni_vp);
2415 	VFS_UNLOCK_GIANT(vfslocked);
2416 	return (error);
2417 }
2418 
2419 /*
2420  * Same as chflags() but doesn't follow symlinks.
2421  */
2422 int
2423 lchflags(td, uap)
2424 	struct thread *td;
2425 	register struct lchflags_args /* {
2426 		char *path;
2427 		int flags;
2428 	} */ *uap;
2429 {
2430 	int error;
2431 	struct nameidata nd;
2432 	int vfslocked;
2433 
2434 	AUDIT_ARG(fflags, uap->flags);
2435 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2436 	    uap->path, td);
2437 	if ((error = namei(&nd)) != 0)
2438 		return (error);
2439 	vfslocked = NDHASGIANT(&nd);
2440 	NDFREE(&nd, NDF_ONLY_PNBUF);
2441 	error = setfflags(td, nd.ni_vp, uap->flags);
2442 	vrele(nd.ni_vp);
2443 	VFS_UNLOCK_GIANT(vfslocked);
2444 	return (error);
2445 }
2446 
2447 /*
2448  * Change flags of a file given a file descriptor.
2449  */
2450 #ifndef _SYS_SYSPROTO_H_
2451 struct fchflags_args {
2452 	int	fd;
2453 	int	flags;
2454 };
2455 #endif
2456 int
2457 fchflags(td, uap)
2458 	struct thread *td;
2459 	register struct fchflags_args /* {
2460 		int fd;
2461 		int flags;
2462 	} */ *uap;
2463 {
2464 	struct file *fp;
2465 	int vfslocked;
2466 	int error;
2467 
2468 	AUDIT_ARG(fd, uap->fd);
2469 	AUDIT_ARG(fflags, uap->flags);
2470 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2471 		return (error);
2472 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2473 #ifdef AUDIT
2474 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2475 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2476 	VOP_UNLOCK(fp->f_vnode, 0, td);
2477 #endif
2478 	error = setfflags(td, fp->f_vnode, uap->flags);
2479 	VFS_UNLOCK_GIANT(vfslocked);
2480 	fdrop(fp, td);
2481 	return (error);
2482 }
2483 
2484 /*
2485  * Common implementation code for chmod(), lchmod() and fchmod().
2486  */
2487 static int
2488 setfmode(td, vp, mode)
2489 	struct thread *td;
2490 	struct vnode *vp;
2491 	int mode;
2492 {
2493 	int error;
2494 	struct mount *mp;
2495 	struct vattr vattr;
2496 
2497 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2498 		return (error);
2499 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2500 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2501 	VATTR_NULL(&vattr);
2502 	vattr.va_mode = mode & ALLPERMS;
2503 #ifdef MAC
2504 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2505 	if (error == 0)
2506 #endif
2507 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2508 	VOP_UNLOCK(vp, 0, td);
2509 	vn_finished_write(mp);
2510 	return (error);
2511 }
2512 
2513 /*
2514  * Change mode of a file given path name.
2515  */
2516 #ifndef _SYS_SYSPROTO_H_
2517 struct chmod_args {
2518 	char	*path;
2519 	int	mode;
2520 };
2521 #endif
2522 int
2523 chmod(td, uap)
2524 	struct thread *td;
2525 	register struct chmod_args /* {
2526 		char *path;
2527 		int mode;
2528 	} */ *uap;
2529 {
2530 
2531 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2532 }
2533 
2534 int
2535 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2536 {
2537 	int error;
2538 	struct nameidata nd;
2539 	int vfslocked;
2540 
2541 	AUDIT_ARG(mode, mode);
2542 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2543 	if ((error = namei(&nd)) != 0)
2544 		return (error);
2545 	vfslocked = NDHASGIANT(&nd);
2546 	NDFREE(&nd, NDF_ONLY_PNBUF);
2547 	error = setfmode(td, nd.ni_vp, mode);
2548 	vrele(nd.ni_vp);
2549 	VFS_UNLOCK_GIANT(vfslocked);
2550 	return (error);
2551 }
2552 
2553 /*
2554  * Change mode of a file given path name (don't follow links.)
2555  */
2556 #ifndef _SYS_SYSPROTO_H_
2557 struct lchmod_args {
2558 	char	*path;
2559 	int	mode;
2560 };
2561 #endif
2562 int
2563 lchmod(td, uap)
2564 	struct thread *td;
2565 	register struct lchmod_args /* {
2566 		char *path;
2567 		int mode;
2568 	} */ *uap;
2569 {
2570 	int error;
2571 	struct nameidata nd;
2572 	int vfslocked;
2573 
2574 	AUDIT_ARG(mode, (mode_t)uap->mode);
2575 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, UIO_USERSPACE,
2576 	    uap->path, td);
2577 	if ((error = namei(&nd)) != 0)
2578 		return (error);
2579 	vfslocked = NDHASGIANT(&nd);
2580 	NDFREE(&nd, NDF_ONLY_PNBUF);
2581 	error = setfmode(td, nd.ni_vp, uap->mode);
2582 	vrele(nd.ni_vp);
2583 	VFS_UNLOCK_GIANT(vfslocked);
2584 	return (error);
2585 }
2586 
2587 /*
2588  * Change mode of a file given a file descriptor.
2589  */
2590 #ifndef _SYS_SYSPROTO_H_
2591 struct fchmod_args {
2592 	int	fd;
2593 	int	mode;
2594 };
2595 #endif
2596 int
2597 fchmod(td, uap)
2598 	struct thread *td;
2599 	register struct fchmod_args /* {
2600 		int fd;
2601 		int mode;
2602 	} */ *uap;
2603 {
2604 	struct file *fp;
2605 	int vfslocked;
2606 	int error;
2607 
2608 	AUDIT_ARG(fd, uap->fd);
2609 	AUDIT_ARG(mode, uap->mode);
2610 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2611 		return (error);
2612 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2613 #ifdef AUDIT
2614 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2615 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2616 	VOP_UNLOCK(fp->f_vnode, 0, td);
2617 #endif
2618 	error = setfmode(td, fp->f_vnode, uap->mode);
2619 	VFS_UNLOCK_GIANT(vfslocked);
2620 	fdrop(fp, td);
2621 	return (error);
2622 }
2623 
2624 /*
2625  * Common implementation for chown(), lchown(), and fchown()
2626  */
2627 static int
2628 setfown(td, vp, uid, gid)
2629 	struct thread *td;
2630 	struct vnode *vp;
2631 	uid_t uid;
2632 	gid_t gid;
2633 {
2634 	int error;
2635 	struct mount *mp;
2636 	struct vattr vattr;
2637 
2638 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2639 		return (error);
2640 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2641 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2642 	VATTR_NULL(&vattr);
2643 	vattr.va_uid = uid;
2644 	vattr.va_gid = gid;
2645 #ifdef MAC
2646 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2647 	    vattr.va_gid);
2648 	if (error == 0)
2649 #endif
2650 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2651 	VOP_UNLOCK(vp, 0, td);
2652 	vn_finished_write(mp);
2653 	return (error);
2654 }
2655 
2656 /*
2657  * Set ownership given a path name.
2658  */
2659 #ifndef _SYS_SYSPROTO_H_
2660 struct chown_args {
2661 	char	*path;
2662 	int	uid;
2663 	int	gid;
2664 };
2665 #endif
2666 int
2667 chown(td, uap)
2668 	struct thread *td;
2669 	register struct chown_args /* {
2670 		char *path;
2671 		int uid;
2672 		int gid;
2673 	} */ *uap;
2674 {
2675 
2676 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2677 }
2678 
2679 int
2680 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2681     int gid)
2682 {
2683 	int error;
2684 	struct nameidata nd;
2685 	int vfslocked;
2686 
2687 	AUDIT_ARG(owner, uid, gid);
2688 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2689 	if ((error = namei(&nd)) != 0)
2690 		return (error);
2691 	vfslocked = NDHASGIANT(&nd);
2692 	NDFREE(&nd, NDF_ONLY_PNBUF);
2693 	error = setfown(td, nd.ni_vp, uid, gid);
2694 	vrele(nd.ni_vp);
2695 	VFS_UNLOCK_GIANT(vfslocked);
2696 	return (error);
2697 }
2698 
2699 /*
2700  * Set ownership given a path name, do not cross symlinks.
2701  */
2702 #ifndef _SYS_SYSPROTO_H_
2703 struct lchown_args {
2704 	char	*path;
2705 	int	uid;
2706 	int	gid;
2707 };
2708 #endif
2709 int
2710 lchown(td, uap)
2711 	struct thread *td;
2712 	register struct lchown_args /* {
2713 		char *path;
2714 		int uid;
2715 		int gid;
2716 	} */ *uap;
2717 {
2718 
2719 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2720 }
2721 
2722 int
2723 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2724     int gid)
2725 {
2726 	int error;
2727 	struct nameidata nd;
2728 	int vfslocked;
2729 
2730 	AUDIT_ARG(owner, uid, gid);
2731 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2732 	if ((error = namei(&nd)) != 0)
2733 		return (error);
2734 	vfslocked = NDHASGIANT(&nd);
2735 	NDFREE(&nd, NDF_ONLY_PNBUF);
2736 	error = setfown(td, nd.ni_vp, uid, gid);
2737 	vrele(nd.ni_vp);
2738 	VFS_UNLOCK_GIANT(vfslocked);
2739 	return (error);
2740 }
2741 
2742 /*
2743  * Set ownership given a file descriptor.
2744  */
2745 #ifndef _SYS_SYSPROTO_H_
2746 struct fchown_args {
2747 	int	fd;
2748 	int	uid;
2749 	int	gid;
2750 };
2751 #endif
2752 int
2753 fchown(td, uap)
2754 	struct thread *td;
2755 	register struct fchown_args /* {
2756 		int fd;
2757 		int uid;
2758 		int gid;
2759 	} */ *uap;
2760 {
2761 	struct file *fp;
2762 	int vfslocked;
2763 	int error;
2764 
2765 	AUDIT_ARG(fd, uap->fd);
2766 	AUDIT_ARG(owner, uap->uid, uap->gid);
2767 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2768 		return (error);
2769 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2770 #ifdef AUDIT
2771 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2772 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2773 	VOP_UNLOCK(fp->f_vnode, 0, td);
2774 #endif
2775 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2776 	VFS_UNLOCK_GIANT(vfslocked);
2777 	fdrop(fp, td);
2778 	return (error);
2779 }
2780 
2781 /*
2782  * Common implementation code for utimes(), lutimes(), and futimes().
2783  */
2784 static int
2785 getutimes(usrtvp, tvpseg, tsp)
2786 	const struct timeval *usrtvp;
2787 	enum uio_seg tvpseg;
2788 	struct timespec *tsp;
2789 {
2790 	struct timeval tv[2];
2791 	const struct timeval *tvp;
2792 	int error;
2793 
2794 	if (usrtvp == NULL) {
2795 		microtime(&tv[0]);
2796 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2797 		tsp[1] = tsp[0];
2798 	} else {
2799 		if (tvpseg == UIO_SYSSPACE) {
2800 			tvp = usrtvp;
2801 		} else {
2802 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2803 				return (error);
2804 			tvp = tv;
2805 		}
2806 
2807 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
2808 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
2809 			return (EINVAL);
2810 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2811 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2812 	}
2813 	return (0);
2814 }
2815 
2816 /*
2817  * Common implementation code for utimes(), lutimes(), and futimes().
2818  */
2819 static int
2820 setutimes(td, vp, ts, numtimes, nullflag)
2821 	struct thread *td;
2822 	struct vnode *vp;
2823 	const struct timespec *ts;
2824 	int numtimes;
2825 	int nullflag;
2826 {
2827 	int error, setbirthtime;
2828 	struct mount *mp;
2829 	struct vattr vattr;
2830 
2831 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2832 		return (error);
2833 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2834 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2835 	setbirthtime = 0;
2836 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2837 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2838 		setbirthtime = 1;
2839 	VATTR_NULL(&vattr);
2840 	vattr.va_atime = ts[0];
2841 	vattr.va_mtime = ts[1];
2842 	if (setbirthtime)
2843 		vattr.va_birthtime = ts[1];
2844 	if (numtimes > 2)
2845 		vattr.va_birthtime = ts[2];
2846 	if (nullflag)
2847 		vattr.va_vaflags |= VA_UTIMES_NULL;
2848 #ifdef MAC
2849 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2850 	    vattr.va_mtime);
2851 #endif
2852 	if (error == 0)
2853 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2854 	VOP_UNLOCK(vp, 0, td);
2855 	vn_finished_write(mp);
2856 	return (error);
2857 }
2858 
2859 /*
2860  * Set the access and modification times of a file.
2861  */
2862 #ifndef _SYS_SYSPROTO_H_
2863 struct utimes_args {
2864 	char	*path;
2865 	struct	timeval *tptr;
2866 };
2867 #endif
2868 int
2869 utimes(td, uap)
2870 	struct thread *td;
2871 	register struct utimes_args /* {
2872 		char *path;
2873 		struct timeval *tptr;
2874 	} */ *uap;
2875 {
2876 
2877 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2878 	    UIO_USERSPACE));
2879 }
2880 
2881 int
2882 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2883     struct timeval *tptr, enum uio_seg tptrseg)
2884 {
2885 	struct timespec ts[2];
2886 	int error;
2887 	struct nameidata nd;
2888 	int vfslocked;
2889 
2890 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2891 		return (error);
2892 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2893 	if ((error = namei(&nd)) != 0)
2894 		return (error);
2895 	vfslocked = NDHASGIANT(&nd);
2896 	NDFREE(&nd, NDF_ONLY_PNBUF);
2897 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2898 	vrele(nd.ni_vp);
2899 	VFS_UNLOCK_GIANT(vfslocked);
2900 	return (error);
2901 }
2902 
2903 /*
2904  * Set the access and modification times of a file.
2905  */
2906 #ifndef _SYS_SYSPROTO_H_
2907 struct lutimes_args {
2908 	char	*path;
2909 	struct	timeval *tptr;
2910 };
2911 #endif
2912 int
2913 lutimes(td, uap)
2914 	struct thread *td;
2915 	register struct lutimes_args /* {
2916 		char *path;
2917 		struct timeval *tptr;
2918 	} */ *uap;
2919 {
2920 
2921 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2922 	    UIO_USERSPACE));
2923 }
2924 
2925 int
2926 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2927     struct timeval *tptr, enum uio_seg tptrseg)
2928 {
2929 	struct timespec ts[2];
2930 	int error;
2931 	struct nameidata nd;
2932 	int vfslocked;
2933 
2934 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2935 		return (error);
2936 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
2937 	if ((error = namei(&nd)) != 0)
2938 		return (error);
2939 	vfslocked = NDHASGIANT(&nd);
2940 	NDFREE(&nd, NDF_ONLY_PNBUF);
2941 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2942 	vrele(nd.ni_vp);
2943 	VFS_UNLOCK_GIANT(vfslocked);
2944 	return (error);
2945 }
2946 
2947 /*
2948  * Set the access and modification times of a file.
2949  */
2950 #ifndef _SYS_SYSPROTO_H_
2951 struct futimes_args {
2952 	int	fd;
2953 	struct	timeval *tptr;
2954 };
2955 #endif
2956 int
2957 futimes(td, uap)
2958 	struct thread *td;
2959 	register struct futimes_args /* {
2960 		int  fd;
2961 		struct timeval *tptr;
2962 	} */ *uap;
2963 {
2964 
2965 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2966 }
2967 
2968 int
2969 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2970     enum uio_seg tptrseg)
2971 {
2972 	struct timespec ts[2];
2973 	struct file *fp;
2974 	int vfslocked;
2975 	int error;
2976 
2977 	AUDIT_ARG(fd, fd);
2978 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2979 		return (error);
2980 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2981 		return (error);
2982 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2983 #ifdef AUDIT
2984 	vn_lock(fp->f_vnode, LK_EXCLUSIVE | LK_RETRY, td);
2985 	AUDIT_ARG(vnode, fp->f_vnode, ARG_VNODE1);
2986 	VOP_UNLOCK(fp->f_vnode, 0, td);
2987 #endif
2988 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2989 	VFS_UNLOCK_GIANT(vfslocked);
2990 	fdrop(fp, td);
2991 	return (error);
2992 }
2993 
2994 /*
2995  * Truncate a file given its path name.
2996  */
2997 #ifndef _SYS_SYSPROTO_H_
2998 struct truncate_args {
2999 	char	*path;
3000 	int	pad;
3001 	off_t	length;
3002 };
3003 #endif
3004 int
3005 truncate(td, uap)
3006 	struct thread *td;
3007 	register struct truncate_args /* {
3008 		char *path;
3009 		int pad;
3010 		off_t length;
3011 	} */ *uap;
3012 {
3013 
3014 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
3015 }
3016 
3017 int
3018 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
3019 {
3020 	struct mount *mp;
3021 	struct vnode *vp;
3022 	struct vattr vattr;
3023 	int error;
3024 	struct nameidata nd;
3025 	int vfslocked;
3026 
3027 	if (length < 0)
3028 		return(EINVAL);
3029 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path, td);
3030 	if ((error = namei(&nd)) != 0)
3031 		return (error);
3032 	vfslocked = NDHASGIANT(&nd);
3033 	vp = nd.ni_vp;
3034 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3035 		vrele(vp);
3036 		VFS_UNLOCK_GIANT(vfslocked);
3037 		return (error);
3038 	}
3039 	NDFREE(&nd, NDF_ONLY_PNBUF);
3040 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3041 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3042 	if (vp->v_type == VDIR)
3043 		error = EISDIR;
3044 #ifdef MAC
3045 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3046 	}
3047 #endif
3048 	else if ((error = vn_writechk(vp)) == 0 &&
3049 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3050 		VATTR_NULL(&vattr);
3051 		vattr.va_size = length;
3052 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3053 	}
3054 	vput(vp);
3055 	vn_finished_write(mp);
3056 	VFS_UNLOCK_GIANT(vfslocked);
3057 	return (error);
3058 }
3059 
3060 /*
3061  * Truncate a file given a file descriptor.
3062  */
3063 #ifndef _SYS_SYSPROTO_H_
3064 struct ftruncate_args {
3065 	int	fd;
3066 	int	pad;
3067 	off_t	length;
3068 };
3069 #endif
3070 int
3071 ftruncate(td, uap)
3072 	struct thread *td;
3073 	register struct ftruncate_args /* {
3074 		int fd;
3075 		int pad;
3076 		off_t length;
3077 	} */ *uap;
3078 {
3079 	struct mount *mp;
3080 	struct vattr vattr;
3081 	struct vnode *vp;
3082 	struct file *fp;
3083 	int vfslocked;
3084 	int error;
3085 
3086 	AUDIT_ARG(fd, uap->fd);
3087 	if (uap->length < 0)
3088 		return(EINVAL);
3089 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3090 		return (error);
3091 	if ((fp->f_flag & FWRITE) == 0) {
3092 		fdrop(fp, td);
3093 		return (EINVAL);
3094 	}
3095 	vp = fp->f_vnode;
3096 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3097 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3098 		goto drop;
3099 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3100 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3101 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3102 	if (vp->v_type == VDIR)
3103 		error = EISDIR;
3104 #ifdef MAC
3105 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3106 	    vp))) {
3107 	}
3108 #endif
3109 	else if ((error = vn_writechk(vp)) == 0) {
3110 		VATTR_NULL(&vattr);
3111 		vattr.va_size = uap->length;
3112 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3113 	}
3114 	VOP_UNLOCK(vp, 0, td);
3115 	vn_finished_write(mp);
3116 drop:
3117 	VFS_UNLOCK_GIANT(vfslocked);
3118 	fdrop(fp, td);
3119 	return (error);
3120 }
3121 
3122 #if defined(COMPAT_43)
3123 /*
3124  * Truncate a file given its path name.
3125  */
3126 #ifndef _SYS_SYSPROTO_H_
3127 struct otruncate_args {
3128 	char	*path;
3129 	long	length;
3130 };
3131 #endif
3132 int
3133 otruncate(td, uap)
3134 	struct thread *td;
3135 	register struct otruncate_args /* {
3136 		char *path;
3137 		long length;
3138 	} */ *uap;
3139 {
3140 	struct truncate_args /* {
3141 		char *path;
3142 		int pad;
3143 		off_t length;
3144 	} */ nuap;
3145 
3146 	nuap.path = uap->path;
3147 	nuap.length = uap->length;
3148 	return (truncate(td, &nuap));
3149 }
3150 
3151 /*
3152  * Truncate a file given a file descriptor.
3153  */
3154 #ifndef _SYS_SYSPROTO_H_
3155 struct oftruncate_args {
3156 	int	fd;
3157 	long	length;
3158 };
3159 #endif
3160 int
3161 oftruncate(td, uap)
3162 	struct thread *td;
3163 	register struct oftruncate_args /* {
3164 		int fd;
3165 		long length;
3166 	} */ *uap;
3167 {
3168 	struct ftruncate_args /* {
3169 		int fd;
3170 		int pad;
3171 		off_t length;
3172 	} */ nuap;
3173 
3174 	nuap.fd = uap->fd;
3175 	nuap.length = uap->length;
3176 	return (ftruncate(td, &nuap));
3177 }
3178 #endif /* COMPAT_43 */
3179 
3180 /*
3181  * Sync an open file.
3182  */
3183 #ifndef _SYS_SYSPROTO_H_
3184 struct fsync_args {
3185 	int	fd;
3186 };
3187 #endif
3188 int
3189 fsync(td, uap)
3190 	struct thread *td;
3191 	struct fsync_args /* {
3192 		int fd;
3193 	} */ *uap;
3194 {
3195 	struct vnode *vp;
3196 	struct mount *mp;
3197 	struct file *fp;
3198 	int vfslocked;
3199 	int error;
3200 
3201 	AUDIT_ARG(fd, uap->fd);
3202 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3203 		return (error);
3204 	vp = fp->f_vnode;
3205 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3206 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3207 		goto drop;
3208 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3209 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3210 	if (vp->v_object != NULL) {
3211 		VM_OBJECT_LOCK(vp->v_object);
3212 		vm_object_page_clean(vp->v_object, 0, 0, 0);
3213 		VM_OBJECT_UNLOCK(vp->v_object);
3214 	}
3215 	error = VOP_FSYNC(vp, MNT_WAIT, td);
3216 
3217 	VOP_UNLOCK(vp, 0, td);
3218 	vn_finished_write(mp);
3219 drop:
3220 	VFS_UNLOCK_GIANT(vfslocked);
3221 	fdrop(fp, td);
3222 	return (error);
3223 }
3224 
3225 /*
3226  * Rename files.  Source and destination must either both be directories,
3227  * or both not be directories.  If target is a directory, it must be empty.
3228  */
3229 #ifndef _SYS_SYSPROTO_H_
3230 struct rename_args {
3231 	char	*from;
3232 	char	*to;
3233 };
3234 #endif
3235 int
3236 rename(td, uap)
3237 	struct thread *td;
3238 	register struct rename_args /* {
3239 		char *from;
3240 		char *to;
3241 	} */ *uap;
3242 {
3243 
3244 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3245 }
3246 
3247 int
3248 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3249 {
3250 	struct mount *mp = NULL;
3251 	struct vnode *tvp, *fvp, *tdvp;
3252 	struct nameidata fromnd, tond;
3253 	int tvfslocked;
3254 	int fvfslocked;
3255 	int error;
3256 
3257 	bwillwrite();
3258 #ifdef MAC
3259 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
3260 	    AUDITVNODE1, pathseg, from, td);
3261 #else
3262 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
3263 	    AUDITVNODE1, pathseg, from, td);
3264 #endif
3265 	if ((error = namei(&fromnd)) != 0)
3266 		return (error);
3267 	fvfslocked = NDHASGIANT(&fromnd);
3268 	tvfslocked = 0;
3269 #ifdef MAC
3270 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3271 	    fromnd.ni_vp, &fromnd.ni_cnd);
3272 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3273 	if (fromnd.ni_dvp != fromnd.ni_vp)
3274 		VOP_UNLOCK(fromnd.ni_vp, 0, td);
3275 #endif
3276 	fvp = fromnd.ni_vp;
3277 	if (error == 0)
3278 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3279 	if (error != 0) {
3280 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3281 		vrele(fromnd.ni_dvp);
3282 		vrele(fvp);
3283 		goto out1;
3284 	}
3285 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3286 	    MPSAFE | AUDITVNODE2, pathseg, to, td);
3287 	if (fromnd.ni_vp->v_type == VDIR)
3288 		tond.ni_cnd.cn_flags |= WILLBEDIR;
3289 	if ((error = namei(&tond)) != 0) {
3290 		/* Translate error code for rename("dir1", "dir2/."). */
3291 		if (error == EISDIR && fvp->v_type == VDIR)
3292 			error = EINVAL;
3293 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3294 		vrele(fromnd.ni_dvp);
3295 		vrele(fvp);
3296 		vn_finished_write(mp);
3297 		goto out1;
3298 	}
3299 	tvfslocked = NDHASGIANT(&tond);
3300 	tdvp = tond.ni_dvp;
3301 	tvp = tond.ni_vp;
3302 	if (tvp != NULL) {
3303 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3304 			error = ENOTDIR;
3305 			goto out;
3306 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3307 			error = EISDIR;
3308 			goto out;
3309 		}
3310 	}
3311 	if (fvp == tdvp)
3312 		error = EINVAL;
3313 	/*
3314 	 * If the source is the same as the destination (that is, if they
3315 	 * are links to the same vnode), then there is nothing to do.
3316 	 */
3317 	if (fvp == tvp)
3318 		error = -1;
3319 #ifdef MAC
3320 	else
3321 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3322 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3323 #endif
3324 out:
3325 	if (!error) {
3326 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3327 		if (fromnd.ni_dvp != tdvp) {
3328 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3329 		}
3330 		if (tvp) {
3331 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3332 		}
3333 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3334 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3335 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3336 		NDFREE(&tond, NDF_ONLY_PNBUF);
3337 	} else {
3338 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3339 		NDFREE(&tond, NDF_ONLY_PNBUF);
3340 		if (tvp)
3341 			vput(tvp);
3342 		if (tdvp == tvp)
3343 			vrele(tdvp);
3344 		else
3345 			vput(tdvp);
3346 		vrele(fromnd.ni_dvp);
3347 		vrele(fvp);
3348 	}
3349 	vrele(tond.ni_startdir);
3350 	vn_finished_write(mp);
3351 out1:
3352 	if (fromnd.ni_startdir)
3353 		vrele(fromnd.ni_startdir);
3354 	VFS_UNLOCK_GIANT(fvfslocked);
3355 	VFS_UNLOCK_GIANT(tvfslocked);
3356 	if (error == -1)
3357 		return (0);
3358 	return (error);
3359 }
3360 
3361 /*
3362  * Make a directory file.
3363  */
3364 #ifndef _SYS_SYSPROTO_H_
3365 struct mkdir_args {
3366 	char	*path;
3367 	int	mode;
3368 };
3369 #endif
3370 int
3371 mkdir(td, uap)
3372 	struct thread *td;
3373 	register struct mkdir_args /* {
3374 		char *path;
3375 		int mode;
3376 	} */ *uap;
3377 {
3378 
3379 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3380 }
3381 
3382 int
3383 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3384 {
3385 	struct mount *mp;
3386 	struct vnode *vp;
3387 	struct vattr vattr;
3388 	int error;
3389 	struct nameidata nd;
3390 	int vfslocked;
3391 
3392 	AUDIT_ARG(mode, mode);
3393 restart:
3394 	bwillwrite();
3395 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
3396 	    segflg, path, td);
3397 	nd.ni_cnd.cn_flags |= WILLBEDIR;
3398 	if ((error = namei(&nd)) != 0)
3399 		return (error);
3400 	vfslocked = NDHASGIANT(&nd);
3401 	vp = nd.ni_vp;
3402 	if (vp != NULL) {
3403 		NDFREE(&nd, NDF_ONLY_PNBUF);
3404 		/*
3405 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3406 		 * the strange behaviour of leaving the vnode unlocked
3407 		 * if the target is the same vnode as the parent.
3408 		 */
3409 		if (vp == nd.ni_dvp)
3410 			vrele(nd.ni_dvp);
3411 		else
3412 			vput(nd.ni_dvp);
3413 		vrele(vp);
3414 		VFS_UNLOCK_GIANT(vfslocked);
3415 		return (EEXIST);
3416 	}
3417 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3418 		NDFREE(&nd, NDF_ONLY_PNBUF);
3419 		vput(nd.ni_dvp);
3420 		VFS_UNLOCK_GIANT(vfslocked);
3421 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3422 			return (error);
3423 		goto restart;
3424 	}
3425 	VATTR_NULL(&vattr);
3426 	vattr.va_type = VDIR;
3427 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3428 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3429 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3430 #ifdef MAC
3431 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3432 	    &vattr);
3433 	if (error)
3434 		goto out;
3435 #endif
3436 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3437 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3438 #ifdef MAC
3439 out:
3440 #endif
3441 	NDFREE(&nd, NDF_ONLY_PNBUF);
3442 	vput(nd.ni_dvp);
3443 	if (!error)
3444 		vput(nd.ni_vp);
3445 	vn_finished_write(mp);
3446 	VFS_UNLOCK_GIANT(vfslocked);
3447 	return (error);
3448 }
3449 
3450 /*
3451  * Remove a directory file.
3452  */
3453 #ifndef _SYS_SYSPROTO_H_
3454 struct rmdir_args {
3455 	char	*path;
3456 };
3457 #endif
3458 int
3459 rmdir(td, uap)
3460 	struct thread *td;
3461 	struct rmdir_args /* {
3462 		char *path;
3463 	} */ *uap;
3464 {
3465 
3466 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3467 }
3468 
3469 int
3470 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3471 {
3472 	struct mount *mp;
3473 	struct vnode *vp;
3474 	int error;
3475 	struct nameidata nd;
3476 	int vfslocked;
3477 
3478 restart:
3479 	bwillwrite();
3480 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
3481 	    pathseg, path, td);
3482 	if ((error = namei(&nd)) != 0)
3483 		return (error);
3484 	vfslocked = NDHASGIANT(&nd);
3485 	vp = nd.ni_vp;
3486 	if (vp->v_type != VDIR) {
3487 		error = ENOTDIR;
3488 		goto out;
3489 	}
3490 	/*
3491 	 * No rmdir "." please.
3492 	 */
3493 	if (nd.ni_dvp == vp) {
3494 		error = EINVAL;
3495 		goto out;
3496 	}
3497 	/*
3498 	 * The root of a mounted filesystem cannot be deleted.
3499 	 */
3500 	if (vp->v_vflag & VV_ROOT) {
3501 		error = EBUSY;
3502 		goto out;
3503 	}
3504 #ifdef MAC
3505 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3506 	    &nd.ni_cnd);
3507 	if (error)
3508 		goto out;
3509 #endif
3510 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3511 		NDFREE(&nd, NDF_ONLY_PNBUF);
3512 		vput(vp);
3513 		if (nd.ni_dvp == vp)
3514 			vrele(nd.ni_dvp);
3515 		else
3516 			vput(nd.ni_dvp);
3517 		VFS_UNLOCK_GIANT(vfslocked);
3518 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3519 			return (error);
3520 		goto restart;
3521 	}
3522 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3523 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3524 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3525 	vn_finished_write(mp);
3526 out:
3527 	NDFREE(&nd, NDF_ONLY_PNBUF);
3528 	vput(vp);
3529 	if (nd.ni_dvp == vp)
3530 		vrele(nd.ni_dvp);
3531 	else
3532 		vput(nd.ni_dvp);
3533 	VFS_UNLOCK_GIANT(vfslocked);
3534 	return (error);
3535 }
3536 
3537 #ifdef COMPAT_43
3538 /*
3539  * Read a block of directory entries in a filesystem independent format.
3540  */
3541 #ifndef _SYS_SYSPROTO_H_
3542 struct ogetdirentries_args {
3543 	int	fd;
3544 	char	*buf;
3545 	u_int	count;
3546 	long	*basep;
3547 };
3548 #endif
3549 int
3550 ogetdirentries(td, uap)
3551 	struct thread *td;
3552 	register struct ogetdirentries_args /* {
3553 		int fd;
3554 		char *buf;
3555 		u_int count;
3556 		long *basep;
3557 	} */ *uap;
3558 {
3559 	struct vnode *vp;
3560 	struct file *fp;
3561 	struct uio auio, kuio;
3562 	struct iovec aiov, kiov;
3563 	struct dirent *dp, *edp;
3564 	caddr_t dirbuf;
3565 	int error, eofflag, readcnt, vfslocked;
3566 	long loff;
3567 
3568 	/* XXX arbitrary sanity limit on `count'. */
3569 	if (uap->count > 64 * 1024)
3570 		return (EINVAL);
3571 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3572 		return (error);
3573 	if ((fp->f_flag & FREAD) == 0) {
3574 		fdrop(fp, td);
3575 		return (EBADF);
3576 	}
3577 	vp = fp->f_vnode;
3578 unionread:
3579 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3580 	if (vp->v_type != VDIR) {
3581 		VFS_UNLOCK_GIANT(vfslocked);
3582 		fdrop(fp, td);
3583 		return (EINVAL);
3584 	}
3585 	aiov.iov_base = uap->buf;
3586 	aiov.iov_len = uap->count;
3587 	auio.uio_iov = &aiov;
3588 	auio.uio_iovcnt = 1;
3589 	auio.uio_rw = UIO_READ;
3590 	auio.uio_segflg = UIO_USERSPACE;
3591 	auio.uio_td = td;
3592 	auio.uio_resid = uap->count;
3593 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3594 	loff = auio.uio_offset = fp->f_offset;
3595 #ifdef MAC
3596 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3597 	if (error) {
3598 		VOP_UNLOCK(vp, 0, td);
3599 		VFS_UNLOCK_GIANT(vfslocked);
3600 		fdrop(fp, td);
3601 		return (error);
3602 	}
3603 #endif
3604 #	if (BYTE_ORDER != LITTLE_ENDIAN)
3605 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3606 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3607 			    NULL, NULL);
3608 			fp->f_offset = auio.uio_offset;
3609 		} else
3610 #	endif
3611 	{
3612 		kuio = auio;
3613 		kuio.uio_iov = &kiov;
3614 		kuio.uio_segflg = UIO_SYSSPACE;
3615 		kiov.iov_len = uap->count;
3616 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3617 		kiov.iov_base = dirbuf;
3618 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3619 			    NULL, NULL);
3620 		fp->f_offset = kuio.uio_offset;
3621 		if (error == 0) {
3622 			readcnt = uap->count - kuio.uio_resid;
3623 			edp = (struct dirent *)&dirbuf[readcnt];
3624 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3625 #				if (BYTE_ORDER == LITTLE_ENDIAN)
3626 					/*
3627 					 * The expected low byte of
3628 					 * dp->d_namlen is our dp->d_type.
3629 					 * The high MBZ byte of dp->d_namlen
3630 					 * is our dp->d_namlen.
3631 					 */
3632 					dp->d_type = dp->d_namlen;
3633 					dp->d_namlen = 0;
3634 #				else
3635 					/*
3636 					 * The dp->d_type is the high byte
3637 					 * of the expected dp->d_namlen,
3638 					 * so must be zero'ed.
3639 					 */
3640 					dp->d_type = 0;
3641 #				endif
3642 				if (dp->d_reclen > 0) {
3643 					dp = (struct dirent *)
3644 					    ((char *)dp + dp->d_reclen);
3645 				} else {
3646 					error = EIO;
3647 					break;
3648 				}
3649 			}
3650 			if (dp >= edp)
3651 				error = uiomove(dirbuf, readcnt, &auio);
3652 		}
3653 		FREE(dirbuf, M_TEMP);
3654 	}
3655 	VOP_UNLOCK(vp, 0, td);
3656 	if (error) {
3657 		VFS_UNLOCK_GIANT(vfslocked);
3658 		fdrop(fp, td);
3659 		return (error);
3660 	}
3661 	if (uap->count == auio.uio_resid) {
3662 		if (union_dircheckp) {
3663 			error = union_dircheckp(td, &vp, fp);
3664 			if (error == -1) {
3665 				VFS_UNLOCK_GIANT(vfslocked);
3666 				goto unionread;
3667 			}
3668 			if (error) {
3669 				VFS_UNLOCK_GIANT(vfslocked);
3670 				fdrop(fp, td);
3671 				return (error);
3672 			}
3673 		}
3674 		/*
3675 		 * XXX We could delay dropping the lock above but
3676 		 * union_dircheckp complicates things.
3677 		 */
3678 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3679 		if ((vp->v_vflag & VV_ROOT) &&
3680 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3681 			struct vnode *tvp = vp;
3682 			vp = vp->v_mount->mnt_vnodecovered;
3683 			VREF(vp);
3684 			fp->f_vnode = vp;
3685 			fp->f_data = vp;
3686 			fp->f_offset = 0;
3687 			vput(tvp);
3688 			VFS_UNLOCK_GIANT(vfslocked);
3689 			goto unionread;
3690 		}
3691 		VOP_UNLOCK(vp, 0, td);
3692 	}
3693 	VFS_UNLOCK_GIANT(vfslocked);
3694 	error = copyout(&loff, uap->basep, sizeof(long));
3695 	fdrop(fp, td);
3696 	td->td_retval[0] = uap->count - auio.uio_resid;
3697 	return (error);
3698 }
3699 #endif /* COMPAT_43 */
3700 
3701 /*
3702  * Read a block of directory entries in a filesystem independent format.
3703  */
3704 #ifndef _SYS_SYSPROTO_H_
3705 struct getdirentries_args {
3706 	int	fd;
3707 	char	*buf;
3708 	u_int	count;
3709 	long	*basep;
3710 };
3711 #endif
3712 int
3713 getdirentries(td, uap)
3714 	struct thread *td;
3715 	register struct getdirentries_args /* {
3716 		int fd;
3717 		char *buf;
3718 		u_int count;
3719 		long *basep;
3720 	} */ *uap;
3721 {
3722 	struct vnode *vp;
3723 	struct file *fp;
3724 	struct uio auio;
3725 	struct iovec aiov;
3726 	int vfslocked;
3727 	long loff;
3728 	int error, eofflag;
3729 
3730 	AUDIT_ARG(fd, uap->fd);
3731 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3732 		return (error);
3733 	if ((fp->f_flag & FREAD) == 0) {
3734 		fdrop(fp, td);
3735 		return (EBADF);
3736 	}
3737 	vp = fp->f_vnode;
3738 unionread:
3739 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3740 	if (vp->v_type != VDIR) {
3741 		error = EINVAL;
3742 		goto fail;
3743 	}
3744 	aiov.iov_base = uap->buf;
3745 	aiov.iov_len = uap->count;
3746 	auio.uio_iov = &aiov;
3747 	auio.uio_iovcnt = 1;
3748 	auio.uio_rw = UIO_READ;
3749 	auio.uio_segflg = UIO_USERSPACE;
3750 	auio.uio_td = td;
3751 	auio.uio_resid = uap->count;
3752 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3753 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3754 	AUDIT_ARG(vnode, vp, ARG_VNODE1);
3755 	loff = auio.uio_offset = fp->f_offset;
3756 #ifdef MAC
3757 	error = mac_check_vnode_readdir(td->td_ucred, vp);
3758 	if (error == 0)
3759 #endif
3760 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3761 		    NULL);
3762 	fp->f_offset = auio.uio_offset;
3763 	VOP_UNLOCK(vp, 0, td);
3764 	if (error)
3765 		goto fail;
3766 	if (uap->count == auio.uio_resid) {
3767 		if (union_dircheckp) {
3768 			error = union_dircheckp(td, &vp, fp);
3769 			if (error == -1) {
3770 				VFS_UNLOCK_GIANT(vfslocked);
3771 				goto unionread;
3772 			}
3773 			if (error)
3774 				goto fail;
3775 		}
3776 		/*
3777 		 * XXX We could delay dropping the lock above but
3778 		 * union_dircheckp complicates things.
3779 		 */
3780 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3781 		if ((vp->v_vflag & VV_ROOT) &&
3782 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3783 			struct vnode *tvp = vp;
3784 			vp = vp->v_mount->mnt_vnodecovered;
3785 			VREF(vp);
3786 			fp->f_vnode = vp;
3787 			fp->f_data = vp;
3788 			fp->f_offset = 0;
3789 			vput(tvp);
3790 			VFS_UNLOCK_GIANT(vfslocked);
3791 			goto unionread;
3792 		}
3793 		VOP_UNLOCK(vp, 0, td);
3794 	}
3795 	if (uap->basep != NULL) {
3796 		error = copyout(&loff, uap->basep, sizeof(long));
3797 	}
3798 	td->td_retval[0] = uap->count - auio.uio_resid;
3799 fail:
3800 	VFS_UNLOCK_GIANT(vfslocked);
3801 	fdrop(fp, td);
3802 	return (error);
3803 }
3804 #ifndef _SYS_SYSPROTO_H_
3805 struct getdents_args {
3806 	int fd;
3807 	char *buf;
3808 	size_t count;
3809 };
3810 #endif
3811 int
3812 getdents(td, uap)
3813 	struct thread *td;
3814 	register struct getdents_args /* {
3815 		int fd;
3816 		char *buf;
3817 		u_int count;
3818 	} */ *uap;
3819 {
3820 	struct getdirentries_args ap;
3821 	ap.fd = uap->fd;
3822 	ap.buf = uap->buf;
3823 	ap.count = uap->count;
3824 	ap.basep = NULL;
3825 	return (getdirentries(td, &ap));
3826 }
3827 
3828 /*
3829  * Set the mode mask for creation of filesystem nodes.
3830  *
3831  * MP SAFE
3832  */
3833 #ifndef _SYS_SYSPROTO_H_
3834 struct umask_args {
3835 	int	newmask;
3836 };
3837 #endif
3838 int
3839 umask(td, uap)
3840 	struct thread *td;
3841 	struct umask_args /* {
3842 		int newmask;
3843 	} */ *uap;
3844 {
3845 	register struct filedesc *fdp;
3846 
3847 	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3848 	fdp = td->td_proc->p_fd;
3849 	td->td_retval[0] = fdp->fd_cmask;
3850 	fdp->fd_cmask = uap->newmask & ALLPERMS;
3851 	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3852 	return (0);
3853 }
3854 
3855 /*
3856  * Void all references to file by ripping underlying filesystem
3857  * away from vnode.
3858  */
3859 #ifndef _SYS_SYSPROTO_H_
3860 struct revoke_args {
3861 	char	*path;
3862 };
3863 #endif
3864 int
3865 revoke(td, uap)
3866 	struct thread *td;
3867 	register struct revoke_args /* {
3868 		char *path;
3869 	} */ *uap;
3870 {
3871 	struct vnode *vp;
3872 	struct vattr vattr;
3873 	int error;
3874 	struct nameidata nd;
3875 	int vfslocked;
3876 
3877 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3878 	    UIO_USERSPACE, uap->path, td);
3879 	if ((error = namei(&nd)) != 0)
3880 		return (error);
3881 	vfslocked = NDHASGIANT(&nd);
3882 	vp = nd.ni_vp;
3883 	NDFREE(&nd, NDF_ONLY_PNBUF);
3884 	if (vp->v_type != VCHR) {
3885 		error = EINVAL;
3886 		goto out;
3887 	}
3888 #ifdef MAC
3889 	error = mac_check_vnode_revoke(td->td_ucred, vp);
3890 	if (error)
3891 		goto out;
3892 #endif
3893 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3894 	if (error)
3895 		goto out;
3896 	if (td->td_ucred->cr_uid != vattr.va_uid) {
3897 		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3898 		if (error)
3899 			goto out;
3900 	}
3901 	if (vcount(vp) > 1)
3902 		VOP_REVOKE(vp, REVOKEALL);
3903 out:
3904 	vput(vp);
3905 	VFS_UNLOCK_GIANT(vfslocked);
3906 	return (error);
3907 }
3908 
3909 /*
3910  * Convert a user file descriptor to a kernel file entry.
3911  * A reference on the file entry is held upon returning.
3912  */
3913 int
3914 getvnode(fdp, fd, fpp)
3915 	struct filedesc *fdp;
3916 	int fd;
3917 	struct file **fpp;
3918 {
3919 	int error;
3920 	struct file *fp;
3921 
3922 	fp = NULL;
3923 	if (fdp == NULL)
3924 		error = EBADF;
3925 	else {
3926 		FILEDESC_LOCK(fdp);
3927 		if ((u_int)fd >= fdp->fd_nfiles ||
3928 		    (fp = fdp->fd_ofiles[fd]) == NULL)
3929 			error = EBADF;
3930 		else if (fp->f_vnode == NULL) {
3931 			fp = NULL;
3932 			error = EINVAL;
3933 		} else {
3934 			fhold(fp);
3935 			error = 0;
3936 		}
3937 		FILEDESC_UNLOCK(fdp);
3938 	}
3939 	*fpp = fp;
3940 	return (error);
3941 }
3942 
3943 /*
3944  * Get (NFS) file handle
3945  */
3946 #ifndef _SYS_SYSPROTO_H_
3947 struct lgetfh_args {
3948 	char	*fname;
3949 	fhandle_t *fhp;
3950 };
3951 #endif
3952 int
3953 lgetfh(td, uap)
3954 	struct thread *td;
3955 	register struct lgetfh_args *uap;
3956 {
3957 	struct nameidata nd;
3958 	fhandle_t fh;
3959 	register struct vnode *vp;
3960 	int vfslocked;
3961 	int error;
3962 
3963 	error = suser(td);
3964 	if (error)
3965 		return (error);
3966 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
3967 	    UIO_USERSPACE, uap->fname, td);
3968 	error = namei(&nd);
3969 	if (error)
3970 		return (error);
3971 	vfslocked = NDHASGIANT(&nd);
3972 	NDFREE(&nd, NDF_ONLY_PNBUF);
3973 	vp = nd.ni_vp;
3974 	bzero(&fh, sizeof(fh));
3975 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3976 	error = VFS_VPTOFH(vp, &fh.fh_fid);
3977 	vput(vp);
3978 	VFS_UNLOCK_GIANT(vfslocked);
3979 	if (error)
3980 		return (error);
3981 	error = copyout(&fh, uap->fhp, sizeof (fh));
3982 	return (error);
3983 }
3984 
3985 #ifndef _SYS_SYSPROTO_H_
3986 struct getfh_args {
3987 	char	*fname;
3988 	fhandle_t *fhp;
3989 };
3990 #endif
3991 int
3992 getfh(td, uap)
3993 	struct thread *td;
3994 	register struct getfh_args *uap;
3995 {
3996 	struct nameidata nd;
3997 	fhandle_t fh;
3998 	register struct vnode *vp;
3999 	int vfslocked;
4000 	int error;
4001 
4002 	error = suser(td);
4003 	if (error)
4004 		return (error);
4005 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
4006 	    UIO_USERSPACE, uap->fname, td);
4007 	error = namei(&nd);
4008 	if (error)
4009 		return (error);
4010 	vfslocked = NDHASGIANT(&nd);
4011 	NDFREE(&nd, NDF_ONLY_PNBUF);
4012 	vp = nd.ni_vp;
4013 	bzero(&fh, sizeof(fh));
4014 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4015 	error = VFS_VPTOFH(vp, &fh.fh_fid);
4016 	vput(vp);
4017 	VFS_UNLOCK_GIANT(vfslocked);
4018 	if (error)
4019 		return (error);
4020 	error = copyout(&fh, uap->fhp, sizeof (fh));
4021 	return (error);
4022 }
4023 
4024 /*
4025  * syscall for the rpc.lockd to use to translate a NFS file handle into
4026  * an open descriptor.
4027  *
4028  * warning: do not remove the suser() call or this becomes one giant
4029  * security hole.
4030  *
4031  * MP SAFE
4032  */
4033 #ifndef _SYS_SYSPROTO_H_
4034 struct fhopen_args {
4035 	const struct fhandle *u_fhp;
4036 	int flags;
4037 };
4038 #endif
4039 int
4040 fhopen(td, uap)
4041 	struct thread *td;
4042 	struct fhopen_args /* {
4043 		const struct fhandle *u_fhp;
4044 		int flags;
4045 	} */ *uap;
4046 {
4047 	struct proc *p = td->td_proc;
4048 	struct mount *mp;
4049 	struct vnode *vp;
4050 	struct fhandle fhp;
4051 	struct vattr vat;
4052 	struct vattr *vap = &vat;
4053 	struct flock lf;
4054 	struct file *fp;
4055 	register struct filedesc *fdp = p->p_fd;
4056 	int fmode, mode, error, type;
4057 	struct file *nfp;
4058 	int vfslocked;
4059 	int indx;
4060 
4061 	error = suser(td);
4062 	if (error)
4063 		return (error);
4064 	fmode = FFLAGS(uap->flags);
4065 	/* why not allow a non-read/write open for our lockd? */
4066 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4067 		return (EINVAL);
4068 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4069 	if (error)
4070 		return(error);
4071 	/* find the mount point */
4072 	mp = vfs_getvfs(&fhp.fh_fsid);
4073 	if (mp == NULL)
4074 		return (ESTALE);
4075 	vfslocked = VFS_LOCK_GIANT(mp);
4076 	/* now give me my vnode, it gets returned to me locked */
4077 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4078 	if (error)
4079 		goto out;
4080 	/*
4081 	 * from now on we have to make sure not
4082 	 * to forget about the vnode
4083 	 * any error that causes an abort must vput(vp)
4084 	 * just set error = err and 'goto bad;'.
4085 	 */
4086 
4087 	/*
4088 	 * from vn_open
4089 	 */
4090 	if (vp->v_type == VLNK) {
4091 		error = EMLINK;
4092 		goto bad;
4093 	}
4094 	if (vp->v_type == VSOCK) {
4095 		error = EOPNOTSUPP;
4096 		goto bad;
4097 	}
4098 	mode = 0;
4099 	if (fmode & (FWRITE | O_TRUNC)) {
4100 		if (vp->v_type == VDIR) {
4101 			error = EISDIR;
4102 			goto bad;
4103 		}
4104 		error = vn_writechk(vp);
4105 		if (error)
4106 			goto bad;
4107 		mode |= VWRITE;
4108 	}
4109 	if (fmode & FREAD)
4110 		mode |= VREAD;
4111 	if (fmode & O_APPEND)
4112 		mode |= VAPPEND;
4113 #ifdef MAC
4114 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4115 	if (error)
4116 		goto bad;
4117 #endif
4118 	if (mode) {
4119 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4120 		if (error)
4121 			goto bad;
4122 	}
4123 	if (fmode & O_TRUNC) {
4124 		VOP_UNLOCK(vp, 0, td);				/* XXX */
4125 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4126 			vrele(vp);
4127 			goto out;
4128 		}
4129 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4130 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4131 #ifdef MAC
4132 		/*
4133 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4134 		 * should be right.
4135 		 */
4136 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4137 		if (error == 0) {
4138 #endif
4139 			VATTR_NULL(vap);
4140 			vap->va_size = 0;
4141 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4142 #ifdef MAC
4143 		}
4144 #endif
4145 		vn_finished_write(mp);
4146 		if (error)
4147 			goto bad;
4148 	}
4149 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4150 	if (error)
4151 		goto bad;
4152 
4153 	if (fmode & FWRITE)
4154 		vp->v_writecount++;
4155 
4156 	/*
4157 	 * end of vn_open code
4158 	 */
4159 
4160 	if ((error = falloc(td, &nfp, &indx)) != 0) {
4161 		if (fmode & FWRITE)
4162 			vp->v_writecount--;
4163 		goto bad;
4164 	}
4165 	/* An extra reference on `nfp' has been held for us by falloc(). */
4166 	fp = nfp;
4167 
4168 	nfp->f_vnode = vp;
4169 	nfp->f_data = vp;
4170 	nfp->f_flag = fmode & FMASK;
4171 	nfp->f_ops = &vnops;
4172 	nfp->f_type = DTYPE_VNODE;
4173 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4174 		lf.l_whence = SEEK_SET;
4175 		lf.l_start = 0;
4176 		lf.l_len = 0;
4177 		if (fmode & O_EXLOCK)
4178 			lf.l_type = F_WRLCK;
4179 		else
4180 			lf.l_type = F_RDLCK;
4181 		type = F_FLOCK;
4182 		if ((fmode & FNONBLOCK) == 0)
4183 			type |= F_WAIT;
4184 		VOP_UNLOCK(vp, 0, td);
4185 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4186 			    type)) != 0) {
4187 			/*
4188 			 * The lock request failed.  Normally close the
4189 			 * descriptor but handle the case where someone might
4190 			 * have dup()d or close()d it when we weren't looking.
4191 			 */
4192 			fdclose(fdp, fp, indx, td);
4193 
4194 			/*
4195 			 * release our private reference
4196 			 */
4197 			fdrop(fp, td);
4198 			goto out;
4199 		}
4200 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4201 		fp->f_flag |= FHASLOCK;
4202 	}
4203 
4204 	VOP_UNLOCK(vp, 0, td);
4205 	fdrop(fp, td);
4206 	vfs_rel(mp);
4207 	VFS_UNLOCK_GIANT(vfslocked);
4208 	td->td_retval[0] = indx;
4209 	return (0);
4210 
4211 bad:
4212 	vput(vp);
4213 out:
4214 	vfs_rel(mp);
4215 	VFS_UNLOCK_GIANT(vfslocked);
4216 	return (error);
4217 }
4218 
4219 /*
4220  * Stat an (NFS) file handle.
4221  *
4222  * MP SAFE
4223  */
4224 #ifndef _SYS_SYSPROTO_H_
4225 struct fhstat_args {
4226 	struct fhandle *u_fhp;
4227 	struct stat *sb;
4228 };
4229 #endif
4230 int
4231 fhstat(td, uap)
4232 	struct thread *td;
4233 	register struct fhstat_args /* {
4234 		struct fhandle *u_fhp;
4235 		struct stat *sb;
4236 	} */ *uap;
4237 {
4238 	struct stat sb;
4239 	fhandle_t fh;
4240 	struct mount *mp;
4241 	struct vnode *vp;
4242 	int vfslocked;
4243 	int error;
4244 
4245 	error = suser(td);
4246 	if (error)
4247 		return (error);
4248 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4249 	if (error)
4250 		return (error);
4251 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4252 		return (ESTALE);
4253 	vfslocked = VFS_LOCK_GIANT(mp);
4254 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) {
4255 		vfs_rel(mp);
4256 		VFS_UNLOCK_GIANT(vfslocked);
4257 		return (error);
4258 	}
4259 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4260 	vput(vp);
4261 	vfs_rel(mp);
4262 	VFS_UNLOCK_GIANT(vfslocked);
4263 	if (error)
4264 		return (error);
4265 	error = copyout(&sb, uap->sb, sizeof(sb));
4266 	return (error);
4267 }
4268 
4269 /*
4270  * Implement fstatfs() for (NFS) file handles.
4271  *
4272  * MP SAFE
4273  */
4274 #ifndef _SYS_SYSPROTO_H_
4275 struct fhstatfs_args {
4276 	struct fhandle *u_fhp;
4277 	struct statfs *buf;
4278 };
4279 #endif
4280 int
4281 fhstatfs(td, uap)
4282 	struct thread *td;
4283 	struct fhstatfs_args /* {
4284 		struct fhandle *u_fhp;
4285 		struct statfs *buf;
4286 	} */ *uap;
4287 {
4288 	struct statfs sf;
4289 	fhandle_t fh;
4290 	int error;
4291 
4292 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4293 	if (error)
4294 		return (error);
4295 	error = kern_fhstatfs(td, fh, &sf);
4296 	if (error)
4297 		return (error);
4298 	return (copyout(&sf, uap->buf, sizeof(sf)));
4299 }
4300 
4301 int
4302 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4303 {
4304 	struct statfs *sp;
4305 	struct mount *mp;
4306 	struct vnode *vp;
4307 	int vfslocked;
4308 	int error;
4309 
4310 	error = suser(td);
4311 	if (error)
4312 		return (error);
4313 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4314 		return (ESTALE);
4315 	vfslocked = VFS_LOCK_GIANT(mp);
4316 	error = VFS_FHTOVP(mp, &fh.fh_fid, &vp);
4317 	if (error) {
4318 		VFS_UNLOCK_GIANT(vfslocked);
4319 		vfs_rel(mp);
4320 		return (error);
4321 	}
4322 	vput(vp);
4323 	error = prison_canseemount(td->td_ucred, mp);
4324 	if (error)
4325 		goto out;
4326 #ifdef MAC
4327 	error = mac_check_mount_stat(td->td_ucred, mp);
4328 	if (error)
4329 		goto out;
4330 #endif
4331 	/*
4332 	 * Set these in case the underlying filesystem fails to do so.
4333 	 */
4334 	sp = &mp->mnt_stat;
4335 	sp->f_version = STATFS_VERSION;
4336 	sp->f_namemax = NAME_MAX;
4337 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4338 	error = VFS_STATFS(mp, sp, td);
4339 	if (error == 0)
4340 		*buf = *sp;
4341 out:
4342 	vfs_rel(mp);
4343 	VFS_UNLOCK_GIANT(vfslocked);
4344 	return (error);
4345 }
4346 
4347 /*
4348  * Syscall to push extended attribute configuration information into the
4349  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4350  * a command (int cmd), and attribute name and misc data.  For now, the
4351  * attribute name is left in userspace for consumption by the VFS_op.
4352  * It will probably be changed to be copied into sysspace by the
4353  * syscall in the future, once issues with various consumers of the
4354  * attribute code have raised their hands.
4355  *
4356  * Currently this is used only by UFS Extended Attributes.
4357  */
4358 int
4359 extattrctl(td, uap)
4360 	struct thread *td;
4361 	struct extattrctl_args /* {
4362 		const char *path;
4363 		int cmd;
4364 		const char *filename;
4365 		int attrnamespace;
4366 		const char *attrname;
4367 	} */ *uap;
4368 {
4369 	struct vnode *filename_vp;
4370 	struct nameidata nd;
4371 	struct mount *mp, *mp_writable;
4372 	char attrname[EXTATTR_MAXNAMELEN];
4373 	int vfslocked, fnvfslocked, error;
4374 
4375 	AUDIT_ARG(cmd, uap->cmd);
4376 	AUDIT_ARG(value, uap->attrnamespace);
4377 	/*
4378 	 * uap->attrname is not always defined.  We check again later when we
4379 	 * invoke the VFS call so as to pass in NULL there if needed.
4380 	 */
4381 	if (uap->attrname != NULL) {
4382 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4383 		    NULL);
4384 		if (error)
4385 			return (error);
4386 	}
4387 	AUDIT_ARG(text, attrname);
4388 
4389 	vfslocked = fnvfslocked = 0;
4390 	/*
4391 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4392 	 * which VFS_EXTATTRCTL() will later release.
4393 	 */
4394 	filename_vp = NULL;
4395 	if (uap->filename != NULL) {
4396 		NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF |
4397 		    AUDITVNODE2, UIO_USERSPACE, uap->filename, td);
4398 		error = namei(&nd);
4399 		if (error)
4400 			return (error);
4401 		fnvfslocked = NDHASGIANT(&nd);
4402 		filename_vp = nd.ni_vp;
4403 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4404 	}
4405 
4406 	/* uap->path is always defined. */
4407 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4408 	    uap->path, td);
4409 	error = namei(&nd);
4410 	if (error) {
4411 		if (filename_vp != NULL)
4412 			vput(filename_vp);
4413 		goto out;
4414 	}
4415 	vfslocked = NDHASGIANT(&nd);
4416 	mp = nd.ni_vp->v_mount;
4417 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4418 	NDFREE(&nd, 0);
4419 	if (error) {
4420 		if (filename_vp != NULL)
4421 			vput(filename_vp);
4422 		goto out;
4423 	}
4424 
4425 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4426 	    uap->attrname != NULL ? attrname : NULL, td);
4427 
4428 	vn_finished_write(mp_writable);
4429 	/*
4430 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4431 	 * filename_vp, so vrele it if it is defined.
4432 	 */
4433 	if (filename_vp != NULL)
4434 		vrele(filename_vp);
4435 out:
4436 	VFS_UNLOCK_GIANT(fnvfslocked);
4437 	VFS_UNLOCK_GIANT(vfslocked);
4438 	return (error);
4439 }
4440 
4441 /*-
4442  * Set a named extended attribute on a file or directory
4443  *
4444  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4445  *            kernelspace string pointer "attrname", userspace buffer
4446  *            pointer "data", buffer length "nbytes", thread "td".
4447  * Returns: 0 on success, an error number otherwise
4448  * Locks: none
4449  * References: vp must be a valid reference for the duration of the call
4450  */
4451 static int
4452 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4453     void *data, size_t nbytes, struct thread *td)
4454 {
4455 	struct mount *mp;
4456 	struct uio auio;
4457 	struct iovec aiov;
4458 	ssize_t cnt;
4459 	int error;
4460 
4461 	VFS_ASSERT_GIANT(vp->v_mount);
4462 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4463 	if (error)
4464 		return (error);
4465 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4466 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4467 
4468 	aiov.iov_base = data;
4469 	aiov.iov_len = nbytes;
4470 	auio.uio_iov = &aiov;
4471 	auio.uio_iovcnt = 1;
4472 	auio.uio_offset = 0;
4473 	if (nbytes > INT_MAX) {
4474 		error = EINVAL;
4475 		goto done;
4476 	}
4477 	auio.uio_resid = nbytes;
4478 	auio.uio_rw = UIO_WRITE;
4479 	auio.uio_segflg = UIO_USERSPACE;
4480 	auio.uio_td = td;
4481 	cnt = nbytes;
4482 
4483 #ifdef MAC
4484 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4485 	    attrname, &auio);
4486 	if (error)
4487 		goto done;
4488 #endif
4489 
4490 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4491 	    td->td_ucred, td);
4492 	cnt -= auio.uio_resid;
4493 	td->td_retval[0] = cnt;
4494 
4495 done:
4496 	VOP_UNLOCK(vp, 0, td);
4497 	vn_finished_write(mp);
4498 	return (error);
4499 }
4500 
4501 int
4502 extattr_set_fd(td, uap)
4503 	struct thread *td;
4504 	struct extattr_set_fd_args /* {
4505 		int fd;
4506 		int attrnamespace;
4507 		const char *attrname;
4508 		void *data;
4509 		size_t nbytes;
4510 	} */ *uap;
4511 {
4512 	struct file *fp;
4513 	char attrname[EXTATTR_MAXNAMELEN];
4514 	int vfslocked, error;
4515 
4516 	AUDIT_ARG(fd, uap->fd);
4517 	AUDIT_ARG(value, uap->attrnamespace);
4518 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4519 	if (error)
4520 		return (error);
4521 	AUDIT_ARG(text, attrname);
4522 
4523 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4524 	if (error)
4525 		return (error);
4526 
4527 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4528 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4529 	    attrname, uap->data, uap->nbytes, td);
4530 	fdrop(fp, td);
4531 	VFS_UNLOCK_GIANT(vfslocked);
4532 
4533 	return (error);
4534 }
4535 
4536 int
4537 extattr_set_file(td, uap)
4538 	struct thread *td;
4539 	struct extattr_set_file_args /* {
4540 		const char *path;
4541 		int attrnamespace;
4542 		const char *attrname;
4543 		void *data;
4544 		size_t nbytes;
4545 	} */ *uap;
4546 {
4547 	struct nameidata nd;
4548 	char attrname[EXTATTR_MAXNAMELEN];
4549 	int vfslocked, error;
4550 
4551 	AUDIT_ARG(value, uap->attrnamespace);
4552 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4553 	if (error)
4554 		return (error);
4555 	AUDIT_ARG(text, attrname);
4556 
4557 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4558 	    uap->path, td);
4559 	error = namei(&nd);
4560 	if (error)
4561 		return (error);
4562 	NDFREE(&nd, NDF_ONLY_PNBUF);
4563 
4564 	vfslocked = NDHASGIANT(&nd);
4565 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4566 	    uap->data, uap->nbytes, td);
4567 
4568 	vrele(nd.ni_vp);
4569 	VFS_UNLOCK_GIANT(vfslocked);
4570 	return (error);
4571 }
4572 
4573 int
4574 extattr_set_link(td, uap)
4575 	struct thread *td;
4576 	struct extattr_set_link_args /* {
4577 		const char *path;
4578 		int attrnamespace;
4579 		const char *attrname;
4580 		void *data;
4581 		size_t nbytes;
4582 	} */ *uap;
4583 {
4584 	struct nameidata nd;
4585 	char attrname[EXTATTR_MAXNAMELEN];
4586 	int vfslocked, error;
4587 
4588 	AUDIT_ARG(value, uap->attrnamespace);
4589 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4590 	if (error)
4591 		return (error);
4592 	AUDIT_ARG(text, attrname);
4593 
4594 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4595 	    uap->path, td);
4596 	error = namei(&nd);
4597 	if (error)
4598 		return (error);
4599 	NDFREE(&nd, NDF_ONLY_PNBUF);
4600 
4601 	vfslocked = NDHASGIANT(&nd);
4602 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4603 	    uap->data, uap->nbytes, td);
4604 
4605 	vrele(nd.ni_vp);
4606 	VFS_UNLOCK_GIANT(vfslocked);
4607 	return (error);
4608 }
4609 
4610 /*-
4611  * Get a named extended attribute on a file or directory
4612  *
4613  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4614  *            kernelspace string pointer "attrname", userspace buffer
4615  *            pointer "data", buffer length "nbytes", thread "td".
4616  * Returns: 0 on success, an error number otherwise
4617  * Locks: none
4618  * References: vp must be a valid reference for the duration of the call
4619  */
4620 static int
4621 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4622     void *data, size_t nbytes, struct thread *td)
4623 {
4624 	struct uio auio, *auiop;
4625 	struct iovec aiov;
4626 	ssize_t cnt;
4627 	size_t size, *sizep;
4628 	int error;
4629 
4630 	VFS_ASSERT_GIANT(vp->v_mount);
4631 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4632 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4633 
4634 	/*
4635 	 * Slightly unusual semantics: if the user provides a NULL data
4636 	 * pointer, they don't want to receive the data, just the
4637 	 * maximum read length.
4638 	 */
4639 	auiop = NULL;
4640 	sizep = NULL;
4641 	cnt = 0;
4642 	if (data != NULL) {
4643 		aiov.iov_base = data;
4644 		aiov.iov_len = nbytes;
4645 		auio.uio_iov = &aiov;
4646 		auio.uio_iovcnt = 1;
4647 		auio.uio_offset = 0;
4648 		if (nbytes > INT_MAX) {
4649 			error = EINVAL;
4650 			goto done;
4651 		}
4652 		auio.uio_resid = nbytes;
4653 		auio.uio_rw = UIO_READ;
4654 		auio.uio_segflg = UIO_USERSPACE;
4655 		auio.uio_td = td;
4656 		auiop = &auio;
4657 		cnt = nbytes;
4658 	} else
4659 		sizep = &size;
4660 
4661 #ifdef MAC
4662 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4663 	    attrname, &auio);
4664 	if (error)
4665 		goto done;
4666 #endif
4667 
4668 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4669 	    td->td_ucred, td);
4670 
4671 	if (auiop != NULL) {
4672 		cnt -= auio.uio_resid;
4673 		td->td_retval[0] = cnt;
4674 	} else
4675 		td->td_retval[0] = size;
4676 
4677 done:
4678 	VOP_UNLOCK(vp, 0, td);
4679 	return (error);
4680 }
4681 
4682 int
4683 extattr_get_fd(td, uap)
4684 	struct thread *td;
4685 	struct extattr_get_fd_args /* {
4686 		int fd;
4687 		int attrnamespace;
4688 		const char *attrname;
4689 		void *data;
4690 		size_t nbytes;
4691 	} */ *uap;
4692 {
4693 	struct file *fp;
4694 	char attrname[EXTATTR_MAXNAMELEN];
4695 	int vfslocked, error;
4696 
4697 	AUDIT_ARG(fd, uap->fd);
4698 	AUDIT_ARG(value, uap->attrnamespace);
4699 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4700 	if (error)
4701 		return (error);
4702 	AUDIT_ARG(text, attrname);
4703 
4704 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4705 	if (error)
4706 		return (error);
4707 
4708 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4709 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4710 	    attrname, uap->data, uap->nbytes, td);
4711 
4712 	fdrop(fp, td);
4713 	VFS_UNLOCK_GIANT(vfslocked);
4714 	return (error);
4715 }
4716 
4717 int
4718 extattr_get_file(td, uap)
4719 	struct thread *td;
4720 	struct extattr_get_file_args /* {
4721 		const char *path;
4722 		int attrnamespace;
4723 		const char *attrname;
4724 		void *data;
4725 		size_t nbytes;
4726 	} */ *uap;
4727 {
4728 	struct nameidata nd;
4729 	char attrname[EXTATTR_MAXNAMELEN];
4730 	int vfslocked, error;
4731 
4732 	AUDIT_ARG(value, uap->attrnamespace);
4733 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4734 	if (error)
4735 		return (error);
4736 	AUDIT_ARG(text, attrname);
4737 
4738 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4739 	    uap->path, td);
4740 	error = namei(&nd);
4741 	if (error)
4742 		return (error);
4743 	NDFREE(&nd, NDF_ONLY_PNBUF);
4744 
4745 	vfslocked = NDHASGIANT(&nd);
4746 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4747 	    uap->data, uap->nbytes, td);
4748 
4749 	vrele(nd.ni_vp);
4750 	VFS_UNLOCK_GIANT(vfslocked);
4751 	return (error);
4752 }
4753 
4754 int
4755 extattr_get_link(td, uap)
4756 	struct thread *td;
4757 	struct extattr_get_link_args /* {
4758 		const char *path;
4759 		int attrnamespace;
4760 		const char *attrname;
4761 		void *data;
4762 		size_t nbytes;
4763 	} */ *uap;
4764 {
4765 	struct nameidata nd;
4766 	char attrname[EXTATTR_MAXNAMELEN];
4767 	int vfslocked, error;
4768 
4769 	AUDIT_ARG(value, uap->attrnamespace);
4770 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4771 	if (error)
4772 		return (error);
4773 	AUDIT_ARG(text, attrname);
4774 
4775 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4776 	    uap->path, td);
4777 	error = namei(&nd);
4778 	if (error)
4779 		return (error);
4780 	NDFREE(&nd, NDF_ONLY_PNBUF);
4781 
4782 	vfslocked = NDHASGIANT(&nd);
4783 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4784 	    uap->data, uap->nbytes, td);
4785 
4786 	vrele(nd.ni_vp);
4787 	VFS_UNLOCK_GIANT(vfslocked);
4788 	return (error);
4789 }
4790 
4791 /*
4792  * extattr_delete_vp(): Delete a named extended attribute on a file or
4793  *                      directory
4794  *
4795  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4796  *            kernelspace string pointer "attrname", proc "p"
4797  * Returns: 0 on success, an error number otherwise
4798  * Locks: none
4799  * References: vp must be a valid reference for the duration of the call
4800  */
4801 static int
4802 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4803     struct thread *td)
4804 {
4805 	struct mount *mp;
4806 	int error;
4807 
4808 	VFS_ASSERT_GIANT(vp->v_mount);
4809 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4810 	if (error)
4811 		return (error);
4812 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4813 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4814 
4815 #ifdef MAC
4816 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4817 	    attrname);
4818 	if (error)
4819 		goto done;
4820 #endif
4821 
4822 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4823 	    td);
4824 	if (error == EOPNOTSUPP)
4825 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4826 		    td->td_ucred, td);
4827 #ifdef MAC
4828 done:
4829 #endif
4830 	VOP_UNLOCK(vp, 0, td);
4831 	vn_finished_write(mp);
4832 	return (error);
4833 }
4834 
4835 int
4836 extattr_delete_fd(td, uap)
4837 	struct thread *td;
4838 	struct extattr_delete_fd_args /* {
4839 		int fd;
4840 		int attrnamespace;
4841 		const char *attrname;
4842 	} */ *uap;
4843 {
4844 	struct file *fp;
4845 	char attrname[EXTATTR_MAXNAMELEN];
4846 	int vfslocked, error;
4847 
4848 	AUDIT_ARG(fd, uap->fd);
4849 	AUDIT_ARG(value, uap->attrnamespace);
4850 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4851 	if (error)
4852 		return (error);
4853 	AUDIT_ARG(text, attrname);
4854 
4855 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4856 	if (error)
4857 		return (error);
4858 
4859 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
4860 	error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace,
4861 	    attrname, td);
4862 	fdrop(fp, td);
4863 	VFS_UNLOCK_GIANT(vfslocked);
4864 	return (error);
4865 }
4866 
4867 int
4868 extattr_delete_file(td, uap)
4869 	struct thread *td;
4870 	struct extattr_delete_file_args /* {
4871 		const char *path;
4872 		int attrnamespace;
4873 		const char *attrname;
4874 	} */ *uap;
4875 {
4876 	struct nameidata nd;
4877 	char attrname[EXTATTR_MAXNAMELEN];
4878 	int vfslocked, error;
4879 
4880 	AUDIT_ARG(value, uap->attrnamespace);
4881 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4882 	if (error)
4883 		return(error);
4884 	AUDIT_ARG(text, attrname);
4885 
4886 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
4887 	    uap->path, td);
4888 	error = namei(&nd);
4889 	if (error)
4890 		return(error);
4891 	NDFREE(&nd, NDF_ONLY_PNBUF);
4892 
4893 	vfslocked = NDHASGIANT(&nd);
4894 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4895 	vrele(nd.ni_vp);
4896 	VFS_UNLOCK_GIANT(vfslocked);
4897 	return(error);
4898 }
4899 
4900 int
4901 extattr_delete_link(td, uap)
4902 	struct thread *td;
4903 	struct extattr_delete_link_args /* {
4904 		const char *path;
4905 		int attrnamespace;
4906 		const char *attrname;
4907 	} */ *uap;
4908 {
4909 	struct nameidata nd;
4910 	char attrname[EXTATTR_MAXNAMELEN];
4911 	int vfslocked, error;
4912 
4913 	AUDIT_ARG(value, uap->attrnamespace);
4914 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4915 	if (error)
4916 		return(error);
4917 	AUDIT_ARG(text, attrname);
4918 
4919 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
4920 	    uap->path, td);
4921 	error = namei(&nd);
4922 	if (error)
4923 		return(error);
4924 	NDFREE(&nd, NDF_ONLY_PNBUF);
4925 
4926 	vfslocked = NDHASGIANT(&nd);
4927 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4928 	vrele(nd.ni_vp);
4929 	VFS_UNLOCK_GIANT(vfslocked);
4930 	return(error);
4931 }
4932 
4933 /*-
4934  * Retrieve a list of extended attributes on a file or directory.
4935  *
4936  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4937  *            userspace buffer pointer "data", buffer length "nbytes",
4938  *            thread "td".
4939  * Returns: 0 on success, an error number otherwise
4940  * Locks: none
4941  * References: vp must be a valid reference for the duration of the call
4942  */
4943 static int
4944 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4945     size_t nbytes, struct thread *td)
4946 {
4947 	struct uio auio, *auiop;
4948 	size_t size, *sizep;
4949 	struct iovec aiov;
4950 	ssize_t cnt;
4951 	int error;
4952 
4953 	VFS_ASSERT_GIANT(vp->v_mount);
4954 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4955 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4956 
4957 	auiop = NULL;
4958 	sizep = NULL;
4959 	cnt = 0;
4960 	if (data != NULL) {
4961 		aiov.iov_base = data;
4962 		aiov.iov_len = nbytes;
4963 		auio.uio_iov = &aiov;
4964 		auio.uio_iovcnt = 1;
4965 		auio.uio_offset = 0;
4966 		if (nbytes > INT_MAX) {
4967 			error = EINVAL;
4968 			goto done;
4969 		}
4970 		auio.uio_resid = nbytes;
4971 		auio.uio_rw = UIO_READ;
4972 		auio.uio_segflg = UIO_USERSPACE;
4973 		auio.uio_td = td;
4974 		auiop = &auio;
4975 		cnt = nbytes;
4976 	} else
4977 		sizep = &size;
4978 
4979 #ifdef MAC
4980 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4981 	if (error)
4982 		goto done;
4983 #endif
4984 
4985 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4986 	    td->td_ucred, td);
4987 
4988 	if (auiop != NULL) {
4989 		cnt -= auio.uio_resid;
4990 		td->td_retval[0] = cnt;
4991 	} else
4992 		td->td_retval[0] = size;
4993 
4994 done:
4995 	VOP_UNLOCK(vp, 0, td);
4996 	return (error);
4997 }
4998 
4999 
5000 int
5001 extattr_list_fd(td, uap)
5002 	struct thread *td;
5003 	struct extattr_list_fd_args /* {
5004 		int fd;
5005 		int attrnamespace;
5006 		void *data;
5007 		size_t nbytes;
5008 	} */ *uap;
5009 {
5010 	struct file *fp;
5011 	int vfslocked, error;
5012 
5013 	AUDIT_ARG(fd, uap->fd);
5014 	AUDIT_ARG(value, uap->attrnamespace);
5015 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
5016 	if (error)
5017 		return (error);
5018 
5019 	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
5020 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
5021 	    uap->nbytes, td);
5022 
5023 	fdrop(fp, td);
5024 	VFS_UNLOCK_GIANT(vfslocked);
5025 	return (error);
5026 }
5027 
5028 int
5029 extattr_list_file(td, uap)
5030 	struct thread*td;
5031 	struct extattr_list_file_args /* {
5032 		const char *path;
5033 		int attrnamespace;
5034 		void *data;
5035 		size_t nbytes;
5036 	} */ *uap;
5037 {
5038 	struct nameidata nd;
5039 	int vfslocked, error;
5040 
5041 	AUDIT_ARG(value, uap->attrnamespace);
5042 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
5043 	    uap->path, td);
5044 	error = namei(&nd);
5045 	if (error)
5046 		return (error);
5047 	NDFREE(&nd, NDF_ONLY_PNBUF);
5048 
5049 	vfslocked = NDHASGIANT(&nd);
5050 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5051 	    uap->nbytes, td);
5052 
5053 	vrele(nd.ni_vp);
5054 	VFS_UNLOCK_GIANT(vfslocked);
5055 	return (error);
5056 }
5057 
5058 int
5059 extattr_list_link(td, uap)
5060 	struct thread*td;
5061 	struct extattr_list_link_args /* {
5062 		const char *path;
5063 		int attrnamespace;
5064 		void *data;
5065 		size_t nbytes;
5066 	} */ *uap;
5067 {
5068 	struct nameidata nd;
5069 	int vfslocked, error;
5070 
5071 	AUDIT_ARG(value, uap->attrnamespace);
5072 	NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW | AUDITVNODE1, UIO_USERSPACE,
5073 	    uap->path, td);
5074 	error = namei(&nd);
5075 	if (error)
5076 		return (error);
5077 	NDFREE(&nd, NDF_ONLY_PNBUF);
5078 
5079 	vfslocked = NDHASGIANT(&nd);
5080 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
5081 	    uap->nbytes, td);
5082 
5083 	vrele(nd.ni_vp);
5084 	VFS_UNLOCK_GIANT(vfslocked);
5085 	return (error);
5086 }
5087